[viennacl] 06/09: Reset sources
Toby St Clere Smithe
tsmithe-guest at moszumanska.debian.org
Wed Feb 19 19:09:56 UTC 2014
This is an automated email from the git hooks/post-receive script.
tsmithe-guest pushed a commit to branch master
in repository viennacl.
commit 48667a27e7ef2391ad62dec6720296cbcc7d1a70
Author: Toby Smithe <git at tsmithe.net>
Date: Wed Feb 19 16:20:47 2014 +0000
Reset sources
---
auxiliary/CMakeLists.txt | 345 ------------
auxiliary/converter.cpp | 383 -------------
auxiliary/ell_matrix/align1/vec_mul.cl | 38 --
auxiliary/generate-blas3-prod-align1.cpp | 285 ----------
auxiliary/generate-blas3-prod16-align1.cpp | 282 ----------
auxiliary/hyb_matrix/align1/vec_mul.cl | 49 --
auxiliary/matrix_col/align1/add.cl | 36 --
auxiliary/matrix_col/align1/assign.cl | 27 -
auxiliary/matrix_col/align1/clear.cl | 16 -
auxiliary/matrix_col/align1/cpu_inplace_mult.cl | 18 -
auxiliary/matrix_col/align1/inplace_add.cl | 26 -
auxiliary/matrix_col/align1/inplace_divide.cl | 19 -
auxiliary/matrix_col/align1/inplace_mult.cl | 20 -
auxiliary/matrix_col/align1/inplace_sub.cl | 27 -
auxiliary/matrix_col/align1/sub.cl | 36 --
auxiliary/matrix_col/align1/trans_vec_mul.cl | 28 -
auxiliary/matrix_col/align1/vec_mul.cl | 28 -
auxiliary/matrix_row/align1/add.cl | 37 --
auxiliary/matrix_row/align1/assign.cl | 27 -
auxiliary/matrix_row/align1/clear.cl | 16 -
auxiliary/matrix_row/align1/cpu_inplace_mult.cl | 17 -
auxiliary/matrix_row/align1/inplace_add.cl | 27 -
auxiliary/matrix_row/align1/inplace_divide.cl | 18 -
auxiliary/matrix_row/align1/inplace_mult.cl | 20 -
auxiliary/matrix_row/align1/inplace_sub.cl | 26 -
auxiliary/matrix_row/align1/sub.cl | 36 --
auxiliary/matrix_row/align1/trans_vec_mul.cl | 29 -
auxiliary/matrix_row/align1/vec_mul.cl | 30 --
auxiliary/nmf/align1/el_wise_mul_div.cl | 14 -
auxiliary/nmf/align1/el_wise_mul_div.cl~ | 13 -
auxiliary/nmf/align1/sub_wise.cl | 10 -
auxiliary/nmf/align1/sub_wise.cl~ | 13 -
auxiliary/svd/align1/bidiag_pack.cl | 19 -
auxiliary/svd/align1/copy_col.cl | 17 -
auxiliary/svd/align1/copy_row.cl | 17 -
auxiliary/svd/align1/givens_prev.cl | 59 --
auxiliary/svd/align1/house_col.cl | 59 --
auxiliary/svd/align1/house_row.cl | 71 ---
auxiliary/svd/align1/inverse_signs.cl | 16 -
auxiliary/svd/align1/transpose_inplace.cl | 25 -
auxiliary/vector/align1/add.cl | 19 -
auxiliary/vector/align1/assign.cl | 15 -
auxiliary/vector/align1/clear.cl | 11 -
auxiliary/vector/align1/cpu_inplace_mul_add.cl | 16 -
auxiliary/vector/align1/cpu_inplace_mult.cl | 12 -
auxiliary/vector/align1/cpu_mul_add.cl | 21 -
auxiliary/vector/align1/cpu_mult.cl | 17 -
auxiliary/vector/align1/diag_precond.cl | 14 -
auxiliary/vector/align1/divide.cl | 18 -
auxiliary/vector/align1/index_norm_inf.cl | 58 --
auxiliary/vector/align1/inner_prod.cl | 64 ---
auxiliary/vector/align1/inplace_add.cl | 15 -
auxiliary/vector/align1/inplace_div_add.cl | 17 -
auxiliary/vector/align1/inplace_div_sub.cl | 18 -
auxiliary/vector/align1/inplace_divide.cl | 13 -
auxiliary/vector/align1/inplace_mul_add.cl | 18 -
auxiliary/vector/align1/inplace_mul_sub.cl | 18 -
auxiliary/vector/align1/inplace_mult.cl | 14 -
auxiliary/vector/align1/inplace_sub.cl | 15 -
auxiliary/vector/align1/mul_add.cl | 23 -
auxiliary/vector/align1/mul_sub.cl | 23 -
auxiliary/vector/align1/mult.cl | 17 -
auxiliary/vector/align1/norm_1.cl | 49 --
auxiliary/vector/align1/norm_2.cl | 52 --
auxiliary/vector/align1/norm_inf.cl | 43 --
auxiliary/vector/align1/plane_rotation.cl | 28 -
auxiliary/vector/align1/sqrt_sum.cl | 22 -
auxiliary/vector/align1/sub.cl | 19 -
auxiliary/vector/align1/sum.cl | 21 -
auxiliary/vector/align1/swap.cl | 23 -
auxiliary/vector/align1/vmax.cl | 22 -
auxiliary/vector/align16/add.cl | 21 -
auxiliary/vector/align16/cpu_inplace_mul.cl | 13 -
auxiliary/vector/align16/cpu_mult.cl | 17 -
auxiliary/vector/align16/divide.cl | 20 -
auxiliary/vector/align16/inplace_add.cl | 16 -
auxiliary/vector/align16/inplace_divide.cl | 15 -
auxiliary/vector/align16/inplace_mult.cl | 14 -
auxiliary/vector/align16/inplace_sub.cl | 17 -
auxiliary/vector/align16/mult.cl | 18 -
auxiliary/vector/align16/sub.cl | 21 -
auxiliary/vector/align4/cpu_inplace_mul_add.cl | 17 -
auxiliary/vector/align4/cpu_mul_add.cl | 21 -
auxiliary/vector/align4/inplace_div_add.cl | 20 -
auxiliary/vector/align4/inplace_div_sub.cl | 20 -
auxiliary/vector/align4/inplace_mul_add.cl | 18 -
auxiliary/vector/align4/inplace_mul_sub.cl | 19 -
auxiliary/vector/align4/mul_add.cl | 22 -
examples/tutorial/iterative-ublas.cpp~ | 163 ------
tests/src/generator_inner_product.cpp | 172 ------
tests/src/generator_matrix.cpp | 219 --------
tests/src/generator_matrix_vector_product.cpp | 234 --------
tests/src/generator_vector.cpp | 331 ------------
tests/src/matrix.cpp | 533 -------------------
tests/src/matrix_range.cpp | 558 -------------------
tests/src/matrix_slice.cpp | 563 --------------------
tests/src/vector_range.cpp | 396 --------------
tests/src/vector_slice.cpp | 396 --------------
viennacl/generator/compound_node.hpp | 199 -------
viennacl/generator/custom_operation.hpp | 268 ----------
viennacl/generator/elementwise_modifier.hpp | 93 ----
viennacl/generator/get_kernels_infos.hpp | 579 --------------------
viennacl/generator/make_code/expression.hpp | 163 ------
viennacl/generator/make_code/inner_product.hpp | 131 -----
viennacl/generator/make_code/make_code.hpp | 33 --
.../generator/make_code/matrix-vector_product.hpp | 143 -----
.../generator/make_code/regular_compound_node.hpp | 104 ----
viennacl/generator/meta_tools/typelist.hpp | 386 --------------
viennacl/generator/meta_tools/utils.hpp | 290 ----------
viennacl/generator/operation_types.hpp | 130 -----
.../symbolic_types/convenience_typedef.hpp | 176 ------
.../generator/symbolic_types/symbolic_matrix.hpp | 156 ------
.../generator/symbolic_types/symbolic_scalars.hpp | 176 ------
.../generator/symbolic_types/symbolic_vector.hpp | 179 -------
viennacl/generator/tokens_management.hpp | 107 ----
.../generator/traits/general_purpose_traits.hpp | 250 ---------
viennacl/generator/traits/result_of.hpp | 591 ---------------------
viennacl/generator/tree_operations.hpp | 487 -----------------
viennacl/linalg/coordinate_matrix_operations.hpp | 222 --------
viennacl/linalg/lanczos.hpp~ | 490 -----------------
viennacl/tools/matrix_kernel_class_deducer.hpp | 73 ---
.../tools/matrix_prod_kernel_class_deducer.hpp | 171 ------
122 files changed, 12485 deletions(-)
diff --git a/auxiliary/CMakeLists.txt b/auxiliary/CMakeLists.txt
deleted file mode 100644
index 662eb98..0000000
--- a/auxiliary/CMakeLists.txt
+++ /dev/null
@@ -1,345 +0,0 @@
-include_directories(${Boost_INCLUDE_DIRS})
-
-add_executable(generate-blas3-solve-align1 generate-blas3-solve-align1.cpp)
-add_executable(generate-blas3-prod-align1 generate-blas3-prod-align1.cpp)
-add_executable(generate-blas3-prod16-align1 generate-blas3-prod16-align1.cpp)
-
-function(generate_blas3_prod_align1 outvar)
- set(crstr_0 col)
- set(crstr_1 row)
- set(ATstr_0 A)
- set(ATstr_1 T)
- set(outfiles)
-
- foreach(ar 0 1) # A is column/row major
- foreach(br 0 1) # B is column/row major
- foreach(cr 0 1) # C is column/row major
- foreach(at 0 1) # A is (not) transposed
- foreach(bt 0 1) # B is (not) transposed
- set(d "${CMAKE_CURRENT_BINARY_DIR}")
- set(d "${d}/matrix_prod_${crstr_${ar}}_${crstr_${br}}_${crstr_${cr}}")
- set(d "${d}/align1")
- file(MAKE_DIRECTORY "${d}")
-
- # standard kernels:
- set(o "${d}/prod_${ATstr_${at}}${ATstr_${bt}}.cl")
- file(RELATIVE_PATH ro "${CMAKE_CURRENT_BINARY_DIR}" "${o}")
- add_custom_command(OUTPUT "${o}"
- COMMAND generate-blas3-prod-align1
- ${ar} ${br} ${cr} ${at} ${bt} > "${o}"
- COMMENT "Generating ${ro}"
- WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
- VERBATIM)
- list(APPEND outfiles "${o}")
-
- # fast kernels:
- set(o16 "${d}/prod16_${ATstr_${at}}${ATstr_${bt}}.cl")
- file(RELATIVE_PATH ro16 "${CMAKE_CURRENT_BINARY_DIR}" "${o16}")
- add_custom_command(OUTPUT "${o16}"
- COMMAND generate-blas3-prod16-align1
- ${ar} ${br} ${cr} ${at} ${bt} > "${o16}"
- COMMENT "Generating ${ro16}"
- WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
- VERBATIM)
- list(APPEND outfiles "${o16}")
-
- endforeach()
- endforeach()
- endforeach()
- endforeach()
- endforeach()
- set(${outvar} "${outfiles}" PARENT_SCOPE)
-endfunction()
-
-function(generate_blas3_solve_align1 outvar)
- set(crstr_0 col)
- set(crstr_1 row)
- set(tstr_0)
- set(tstr_1 trans_)
- set(ulstr_0 lower)
- set(ulstr_1 upper)
- set(unitstr_0)
- set(unitstr_1 unit_)
- set(outfiles)
-
- foreach(ar 0 1) # A is column/row major
- foreach(br 0 1) # A is column/row major
- foreach(at 0 1) # A is transposed
- foreach(bt 0 1) # B is transposed
- foreach(ul 0 1) # upper/lower
- foreach(un 0 1) # unit
- set(d "${CMAKE_CURRENT_BINARY_DIR}")
- set(d "${d}/matrix_solve_${crstr_${ar}}_${crstr_${br}}")
- set(d "${d}/align1")
- file(MAKE_DIRECTORY "${d}")
- set(o "${d}/${tstr_${at}}${unitstr_${un}}${ulstr_${ul}}_${tstr_${bt}}solve.cl")
- file(RELATIVE_PATH ro "${CMAKE_CURRENT_BINARY_DIR}" "${o}")
- add_custom_command(OUTPUT "${o}"
- COMMAND generate-blas3-solve-align1
- ${ar} ${br} ${at} ${bt} ${ul} ${un} > "${o}"
- COMMENT "Generating ${ro}"
- WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
- VERBATIM)
- list(APPEND outfiles "${o}")
- endforeach()
- endforeach()
- endforeach()
- endforeach()
- endforeach()
- endforeach()
- set(${outvar} "${outfiles}" PARENT_SCOPE)
-endfunction()
-
-# Matrix-Matrix products
-generate_blas3_prod_align1(MATRIX_PROD_SRCS)
-
-# Matrix-Matrix triangular solver
-generate_blas3_solve_align1(MATRIX_SOLVE_SRCS)
-
-set(COMPRESSED_MATRIX_SRCS
- compressed_matrix/align1/bicgstab_kernel1.cl
- compressed_matrix/align1/bicgstab_kernel2.cl
- compressed_matrix/align1/jacobi.cl
- compressed_matrix/align1/jacobi_precond.cl
- compressed_matrix/align1/lu_backward.cl
- compressed_matrix/align1/lu_forward.cl
- compressed_matrix/align1/row_scaling_1.cl
- compressed_matrix/align1/row_scaling_2.cl
- compressed_matrix/align1/vec_mul.cl
- compressed_matrix/align4/vec_mul.cl
- compressed_matrix/align8/vec_mul.cl)
-
-set(COORDINATE_MATRIX_SRCS
- coordinate_matrix/align1/vec_mul.cl
- coordinate_matrix/align128/dummy)
-
-set(ELL_MATRIX_SRCS
- ell_matrix/align1/vec_mul.cl)
-
-set(HYB_MATRIX_SRCS
- hyb_matrix/align1/vec_mul.cl)
-
-set(MATRIX_COL_SRCS
- matrix_col/align1/add.cl
- matrix_col/align1/assign.cl
- matrix_col/align1/clear.cl
- matrix_col/align1/cpu_inplace_mult.cl
- matrix_col/align1/fft_direct.cl
- matrix_col/align1/fft_radix2.cl
- matrix_col/align1/fft_radix2_local.cl
- matrix_col/align1/fft_reorder.cl
- matrix_col/align1/inplace_add.cl
- matrix_col/align1/inplace_divide.cl
- matrix_col/align1/inplace_mult.cl
- matrix_col/align1/inplace_sub.cl
- matrix_col/align1/lower_triangular_substitute_inplace.cl
- matrix_col/align1/lu_factorize.cl
- matrix_col/align1/rank1_update.cl
- matrix_col/align1/scaled_rank1_update.cl
- matrix_col/align1/sub.cl
- matrix_col/align1/trans_lower_triangular_substitute_inplace.cl
- matrix_col/align1/trans_unit_lower_triangular_substitute_inplace.cl
- matrix_col/align1/trans_unit_upper_triangular_substitute_inplace.cl
- matrix_col/align1/trans_upper_triangular_substitute_inplace.cl
- matrix_col/align1/trans_vec_mul.cl
- matrix_col/align1/unit_lower_triangular_substitute_inplace.cl
- matrix_col/align1/unit_upper_triangular_substitute_inplace.cl
- matrix_col/align1/upper_triangular_substitute_inplace.cl
- matrix_col/align1/vec_mul.cl
- matrix_col/align16/dummy)
-
-set(MATRIX_ROW_SRCS
- matrix_row/align1/add.cl
- matrix_row/align1/assign.cl
- matrix_row/align1/clear.cl
- matrix_row/align1/cpu_inplace_mult.cl
- matrix_row/align1/fft_direct.cl
- matrix_row/align1/fft_radix2.cl
- matrix_row/align1/fft_radix2_local.cl
- matrix_row/align1/fft_reorder.cl
- matrix_row/align1/inplace_add.cl
- matrix_row/align1/inplace_divide.cl
- matrix_row/align1/inplace_mult.cl
- matrix_row/align1/inplace_sub.cl
- matrix_row/align1/lower_triangular_substitute_inplace.cl
- matrix_row/align1/lu_factorize.cl
- matrix_row/align1/rank1_update.cl
- matrix_row/align1/scaled_rank1_update.cl
- matrix_row/align1/sub.cl
- matrix_row/align1/trans_lower_triangular_substitute_inplace.cl
- matrix_row/align1/trans_unit_lower_triangular_substitute_inplace.cl
- matrix_row/align1/trans_unit_upper_triangular_substitute_inplace.cl
- matrix_row/align1/trans_upper_triangular_substitute_inplace.cl
- matrix_row/align1/trans_vec_mul.cl
- matrix_row/align1/unit_lower_triangular_substitute_inplace.cl
- matrix_row/align1/unit_upper_triangular_substitute_inplace.cl
- matrix_row/align1/upper_triangular_substitute_inplace.cl
- matrix_row/align1/vec_mul.cl
- matrix_row/align16/dummy)
-
-set(SCALAR_SRCS
- scalar/align1/add.cl
- scalar/align1/cpu_add.cl
- scalar/align1/cpu_div.cl
- scalar/align1/cpu_inplace_add.cl
- scalar/align1/cpu_inplace_div.cl
- scalar/align1/cpu_inplace_mul.cl
- scalar/align1/cpu_inplace_sub.cl
- scalar/align1/cpu_mul.cl
- scalar/align1/cpu_sub.cl
- scalar/align1/divide.cl
- scalar/align1/inplace_add.cl
- scalar/align1/inplace_div.cl
- scalar/align1/inplace_mul.cl
- scalar/align1/inplace_sub.cl
- scalar/align1/mul.cl
- scalar/align1/sub.cl)
-
-set(VECTOR_SRCS
- vector/align16/add.cl
- vector/align16/cpu_inplace_mul.cl
- vector/align16/cpu_mult.cl
- vector/align16/divide.cl
- vector/align16/inplace_add.cl
- vector/align16/inplace_divide.cl
- vector/align16/inplace_mult.cl
- vector/align16/inplace_sub.cl
- vector/align16/mult.cl
- vector/align16/sub.cl
- vector/align1/add.cl
- vector/align1/assign.cl
- vector/align1/clear.cl
- vector/align1/cpu_inplace_mul_add.cl
- vector/align1/cpu_inplace_mult.cl
- vector/align1/cpu_mul_add.cl
- vector/align1/cpu_mult.cl
- vector/align1/diag_precond.cl
- vector/align1/divide.cl
- vector/align1/index_norm_inf.cl
- vector/align1/inner_prod.cl
- vector/align1/inplace_add.cl
- vector/align1/inplace_div_add.cl
- vector/align1/inplace_divide.cl
- vector/align1/inplace_div_sub.cl
- vector/align1/inplace_mul_add.cl
- vector/align1/inplace_mul_sub.cl
- vector/align1/inplace_mult.cl
- vector/align1/inplace_sub.cl
- vector/align1/mul_add.cl
- vector/align1/mul_sub.cl
- vector/align1/mult.cl
- vector/align1/norm_1.cl
- vector/align1/norm_2.cl
- vector/align1/norm_inf.cl
- vector/align1/plane_rotation.cl
- vector/align1/sqrt_sum.cl
- vector/align1/sub.cl
- vector/align1/sum.cl
- vector/align1/swap.cl
- vector/align1/vmax.cl
- vector/align4/cpu_inplace_mul_add.cl
- vector/align4/cpu_mul_add.cl
- vector/align4/inplace_div_add.cl
- vector/align4/inplace_div_sub.cl
- vector/align4/inplace_mul_add.cl
- vector/align4/inplace_mul_sub.cl
- vector/align4/mul_add.cl)
-
-set(FFT_SRCS
- fft/align1/bluestein_post.cl
- fft/align1/bluestein_pre.cl
- fft/align1/complex_to_real.cl
- fft/align1/fft_div_vec_scalar.cl
- fft/align1/fft_mult_vec.cl
- fft/align1/real_to_complex.cl
- fft/align1/reverse_inplace.cl
- fft/align1/transpose.cl
- fft/align1/transpose_inplace.cl
- fft/align1/vandermonde_prod.cl
- fft/align1/zero2.cl
- )
-
-set(SVD_SRCS
- svd/align1/copy_col.cl
- svd/align1/copy_row.cl
- svd/align1/transpose_inplace.cl
- svd/align1/inverse_signs.cl
- svd/align1/givens_prev.cl
- svd/align1/bidiag_pack.cl
- svd/align1/house_col.cl
- svd/align1/house_row.cl
- )
-
-set(SPAI_SRCS
- spai/align1/assemble_blocks.cl
- spai/align1/block_bv_assembly.cl
- spai/align1/block_least_squares.cl
- spai/align1/block_q_mult.cl
- spai/align1/block_qr.cl
- spai/align1/block_qr_assembly.cl
- spai/align1/block_qr_assembly_1.cl
- spai/align1/block_r_assembly.cl
- )
-
-set(NMF_SRCS
- nmf/align1/el_wise_mul_div.cl
- nmf/align1/sub_wise.cl
- )
-
-set(CL_SRCS)
-foreach(f IN LISTS COMPRESSED_MATRIX_SRCS COORDINATE_MATRIX_SRCS ELL_MATRIX_SRCS HYB_MATRIX_SRCS
- MATRIX_COL_SRCS MATRIX_ROW_SRCS SCALAR_SRCS VECTOR_SRCS FFT_SRCS SVD_SRCS SPAI_SRCS NMF_SRCS)
- get_filename_component(d "${CMAKE_CURRENT_BINARY_DIR}/${f}" PATH)
- file(MAKE_DIRECTORY "${d}")
- configure_file(${f} "${CMAKE_CURRENT_BINARY_DIR}/${f}" COPYONLY)
- list(APPEND CL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${f}")
-endforeach()
-list(APPEND CL_SRCS ${MATRIX_PROD_SRCS} ${MATRIX_SOLVE_SRCS})
-
-add_executable(converter converter.cpp)
-target_link_libraries(converter ${Boost_LIBRARIES})
-
-set(KERNEL_HDRS)
-set(KERNEL_SRCS)
-foreach(d
- compressed_matrix
- coordinate_matrix
- ell_matrix
- hyb_matrix
- matrix_col
- matrix_prod_col_col_col
- matrix_prod_col_col_row
- matrix_prod_col_row_col
- matrix_prod_col_row_row
- matrix_prod_row_col_col
- matrix_prod_row_col_row
- matrix_prod_row_row_col
- matrix_prod_row_row_row
- matrix_row
- matrix_solve_col_col
- matrix_solve_col_row
- matrix_solve_row_col
- matrix_solve_row_row
- scalar
- vector
- fft
- svd
- spai
- nmf
- )
- set(f "${PROJECT_SOURCE_DIR}/viennacl/linalg/kernels/${d}")
- list(APPEND KERNEL_HDRS "${f}_kernels.h")
- list(APPEND KERNEL_SRCS "${f}_source.h")
-endforeach()
-
-file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/viennacl/linalg/kernels")
-
-add_custom_command(OUTPUT ${KERNEL_HDRS} ${KERNEL_SRCS}
- COMMAND converter
- DEPENDS ${CL_SRCS}
- WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
- COMMENT "Generating kernel headers and sources"
- VERBATIM)
-
-add_custom_target(kernels ALL
- DEPENDS ${KERNEL_HDRS} ${KERNEL_SRCS})
diff --git a/auxiliary/converter.cpp b/auxiliary/converter.cpp
deleted file mode 100644
index 03f624c..0000000
--- a/auxiliary/converter.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
-* Converts OpenCL sources to header file string constants
-*/
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-
-#define BOOST_FILESYSTEM_VERSION 2
-
-#include <boost/filesystem/operations.hpp>
-#include <boost/filesystem/path.hpp>
-#include <iostream>
-
-namespace fs = boost::filesystem;
-
-void writeSourceFile(std::ofstream & out_file, std::string & filename, const char * dirname, const char * alignment)
-{
- std::string fullpath(dirname);
- fullpath += "/";
- fullpath += alignment;
- fullpath += "/";
- fullpath += filename;
- std::ifstream in_file(fullpath.c_str());
- std::string tmp;
-
- if (in_file.is_open())
- {
- //write variable declaration:
- out_file << "const char * const " << dirname << "_" << alignment << "_" << filename.substr(0, filename.size()-3) << " = " << std::endl;
-
- //write source string:
- while (getline(in_file, tmp, '\n'))
- {
- if (tmp.size() > 0)
- {
- //out_file << "\"" << tmp.replace(tmp.end()-1, tmp.end(), "\\n\"") << std::endl;
- if ( *(tmp.end()-1) == '\r') //Windows line delimiter, \r\n
- out_file << "\"" << tmp.replace(tmp.end()-1, tmp.end(), "\\n\"") << std::endl;
- else //Unix line delimiter \n
- out_file << "\"" << tmp.append("\\n\"") << std::endl;
- }
- }
- out_file << "; //" << dirname << "_" << alignment << "_" << filename.substr(0, filename.size()-3) << std::endl << std::endl;
-
- }
- else
- std::cerr << "Failed to open file " << filename << std::endl;
-}
-
-void createSourceFile(const char * dirname)
-{
- //Step 1: Open source file
- std::string header_name(dirname);
- std::ofstream source_file(("../../viennacl/linalg/kernels/" + header_name + "_source.h").c_str());
-
- //Step 2: Write source header file preamble
- std::string dirname_uppercase(dirname);
- std::transform(dirname_uppercase.begin(), dirname_uppercase.end(), dirname_uppercase.begin(), toupper);
- source_file << "#ifndef VIENNACL_LINALG_KERNELS_" << dirname_uppercase << "_SOURCE_HPP_" << std::endl;
- source_file << "#define VIENNACL_LINALG_KERNELS_" << dirname_uppercase << "_SOURCE_HPP_" << std::endl;
- source_file << "//Automatically generated file from auxiliary-directory, do not edit manually!" << std::endl;
- source_file << "namespace viennacl" << std::endl;
- source_file << "{" << std::endl;
- source_file << " namespace linalg" << std::endl;
- source_file << " {" << std::endl;
- source_file << " namespace kernels" << std::endl;
- source_file << " {" << std::endl;
-
- //Step 3: Write all OpenCL kernel sources into header file
- fs::path filepath = fs::system_complete( fs::path( dirname ) );
- if ( fs::is_directory( filepath ) )
- {
- //std::cout << "\n In directory " << filepath.directory_string() << std::endl;
-
- fs::directory_iterator end_iter;
- //write and register single precision sources:
- for ( fs::directory_iterator alignment_itr( filepath );
- alignment_itr != end_iter;
- ++alignment_itr )
- {
- if (fs::is_directory( alignment_itr->path() ))
- {
- std::cout << "\nGenerating kernels from directory " << alignment_itr->path().directory_string() << std::endl;
-
- //write and register single precision sources:
- for ( fs::directory_iterator cl_itr( alignment_itr->path() );
- cl_itr != end_iter;
- ++cl_itr )
- {
- std::string fname = cl_itr->path().filename();
- std::string alignment = alignment_itr->path().filename();
-
- size_t pos = fname.find(".cl");
- if ( pos == std::string::npos )
- continue;
-
- if (fname.substr(fname.size()-3, 3) == ".cl")
- writeSourceFile(source_file, fname, dirname, alignment.c_str());
- //std::cout << alignment_itr->path().filename() << "/" << fname << std::endl;
- } //for
- } //if is_directory
- } //for alignment_iterator
- } //if is_directory
- else
- std::cerr << "Cannot access directory " << dirname << std::endl;
-
- //Final Step: Write file tail:
- source_file << " } //namespace kernels" << std::endl;
- source_file << " } //namespace linalg" << std::endl;
- source_file << "} //namespace viennacl" << std::endl;
- source_file << "#endif" << std::endl;
- source_file.close();
-}
-
-
-unsigned int getBestKernel(const char * dirname, std::string & kernel_name, unsigned int alignment)
-{
- unsigned int search_alignment = alignment;
- //std::cout << "Searching for best match for " << kernel_name << " with alignment " << alignment << std::endl;
-
- while (search_alignment > 1)
- {
- std::ostringstream oss;
- oss << dirname << "/align" << search_alignment;
- //std::cout << "Searching " << oss.str() << std::endl;
-
- //try to find kernel in directory:
- fs::path filepath = fs::system_complete( fs::path( oss.str() ) );
- if ( fs::is_directory( filepath ) ) //directory exists?
- {
- fs::directory_iterator end_iter;
- for ( fs::directory_iterator cl_itr( filepath );
- cl_itr != end_iter;
- ++cl_itr )
- {
- std::string fname = cl_itr->path().filename();
- if (fname == kernel_name)
- {
- //std::cout << "Found matching kernel for " << kernel_name << " with alignment " << alignment << " at alignment " << search_alignment << std::endl;
- return search_alignment;
- }
- }
- }
-
- search_alignment /= 2;
- }
-
- //std::cout << "Found alignment 1 only..." << std::endl;
- //nothing found: return alignment 1:
- return 1;
-}
-
-
-void writeKernelInit(std::ostream & kernel_file, const char * dirname, std::string & subfolder, bool is_float)
-{
- //extract alignment information from subfolder string:
- std::istringstream stream(subfolder.substr(5, subfolder.size()-5));
- unsigned int alignment = 0;
- stream >> alignment;
- if (alignment == 0)
- std::cerr << "ERROR: Could not extract alignment from " << subfolder << std::endl;
-
- kernel_file << " template <>" << std::endl;
- kernel_file << " struct " << dirname;
- if (is_float)
- kernel_file << "<float, ";
- else
- kernel_file << "<double, ";
- kernel_file << alignment << ">" << std::endl;
- kernel_file << " {" << std::endl;
-
- kernel_file << " static std::string program_name()" << std::endl;
- kernel_file << " {" << std::endl;
- kernel_file << " return \"";
- if (is_float)
- kernel_file << "f";
- else
- kernel_file << "d";
- kernel_file << "_" << dirname << "_" << alignment << "\";" << std::endl;
- kernel_file << " }" << std::endl;
-
- kernel_file << " static void init()" << std::endl;
- kernel_file << " {" << std::endl;
- if (is_float)
- kernel_file << " viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();" << std::endl;
- else
- kernel_file << " viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();" << std::endl;
- kernel_file << " static std::map<cl_context, bool> init_done;" << std::endl;
- kernel_file << " viennacl::ocl::context & context_ = viennacl::ocl::current_context();" << std::endl;
- kernel_file << " if (!init_done[context_.handle().get()])" << std::endl;
- kernel_file << " {" << std::endl;
- kernel_file << " std::string source;" << std::endl;
- if (!is_float)
- kernel_file << " std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();" << std::endl;
-
- //iterate over all kernels in align1-folder:
- std::string current_dir(dirname);
- current_dir += "/align1";
- fs::path filepath = fs::system_complete( fs::path( current_dir ) );
-
- fs::directory_iterator end_iter;
- //write and register single precision sources:
- for ( fs::directory_iterator cl_itr( filepath );
- cl_itr != end_iter;
- ++cl_itr )
- {
- std::string fname = cl_itr->path().filename();
- size_t pos = fname.find(".cl");
- if ( pos == std::string::npos )
- continue;
-
- if (fname.substr(fname.size()-3, 3) == ".cl")
- {
- //add kernel source to program string:
- kernel_file << " source.append(";
- if (!is_float)
- kernel_file << "viennacl::tools::make_double_kernel(";
- kernel_file << dirname << "_align" << getBestKernel(dirname, fname, alignment) << "_" << fname.substr(0, fname.size()-3);
- if (!is_float)
- kernel_file << ", fp64_ext)";
- kernel_file << ");" << std::endl;
- }
- } //for
-
- kernel_file << " std::string prog_name = program_name();" << std::endl;
- kernel_file << " #ifdef VIENNACL_BUILD_INFO" << std::endl;
- kernel_file << " std::cout << \"Creating program \" << prog_name << std::endl;" << std::endl;
- kernel_file << " #endif" << std::endl;
- kernel_file << " context_.add_program(source, prog_name);" << std::endl;
- kernel_file << " viennacl::ocl::program & prog_ = context_.get_program(prog_name);" << std::endl;
-
- //write and register single precision sources:
- for ( fs::directory_iterator cl_itr( filepath );
- cl_itr != end_iter;
- ++cl_itr )
- {
- std::string fname = cl_itr->path().filename();
- size_t pos = fname.find(".cl");
- if ( pos == std::string::npos )
- continue;
-
- if (fname.substr(fname.size()-3, 3) == ".cl")
- {
- //initialize kernel:
- kernel_file << " prog_.add_kernel(\"" << fname.substr(0, fname.size()-3) << "\");" << std::endl;
- }
- } //for
-
- kernel_file << " init_done[context_.handle().get()] = true;" << std::endl;
- kernel_file << " } //if" << std::endl;
- kernel_file << " } //init" << std::endl;
- kernel_file << " }; // struct" << std::endl << std::endl;
-}
-
-
-
-
-void createKernelFile(const char * dirname)
-{
- //Step 1: Open kernel file
- std::string header_name(dirname);
- std::ofstream kernel_file(("../../viennacl/linalg/kernels/" + header_name + "_kernels.h").c_str());
-
- //Step 2: Write kernel header file preamble
- std::string dirname_uppercase(dirname);
- std::transform(dirname_uppercase.begin(), dirname_uppercase.end(), dirname_uppercase.begin(), toupper);
- kernel_file << "#ifndef _VIENNACL_" << dirname_uppercase << "_KERNELS_HPP_" << std::endl;
- kernel_file << "#define _VIENNACL_" << dirname_uppercase << "_KERNELS_HPP_" << std::endl;
- kernel_file << "#include \"viennacl/tools/tools.hpp\"" << std::endl;
- kernel_file << "#include \"viennacl/ocl/kernel.hpp\"" << std::endl;
- kernel_file << "#include \"viennacl/ocl/platform.hpp\"" << std::endl;
- kernel_file << "#include \"viennacl/ocl/utils.hpp\"" << std::endl;
- kernel_file << "#include \"viennacl/linalg/kernels/" << dirname << "_source.h\"" << std::endl;
- kernel_file << std::endl;
- kernel_file << "//Automatically generated file from aux-directory, do not edit manually!" << std::endl;
- kernel_file << "namespace viennacl" << std::endl;
- kernel_file << "{" << std::endl;
- kernel_file << " namespace linalg" << std::endl;
- kernel_file << " {" << std::endl;
- kernel_file << " namespace kernels" << std::endl;
- kernel_file << " {" << std::endl;
-
- //Step 3: Write class information:
- kernel_file << " template<class TYPE, unsigned int alignment>" << std::endl;
- kernel_file << " struct " << dirname << ";" << std::endl << std::endl;
-
- //Step 4: Write single precision kernels
- std::string dir(dirname);
- kernel_file << std::endl << " /////////////// single precision kernels //////////////// " << std::endl;
- fs::path filepath = fs::system_complete( fs::path( dir ) );
- if ( fs::is_directory( filepath ) )
- {
- //std::cout << "\nIn directory: " << filepath.directory_string() << std::endl;
-
- fs::directory_iterator end_iter;
- //write and register single precision sources:
- for ( fs::directory_iterator alignment_itr( filepath );
- alignment_itr != end_iter;
- ++alignment_itr )
- {
- if (fs::is_directory( alignment_itr->path() ))
- {
- std::string subfolder = alignment_itr->path().filename();
- if( subfolder.find("align") == std::string::npos )
- continue;
- writeKernelInit(kernel_file, dirname, subfolder, true);
- } //if is_directory
- } //for alignment_iterator
- kernel_file << std::endl;
- } //if is_directory
- else
- std::cerr << "Cannot access directory " << dirname << std::endl;
-
- //Step 5: Write double precision kernels
- kernel_file << std::endl << " /////////////// double precision kernels //////////////// " << std::endl;
- filepath = fs::system_complete( fs::path( dir ) );
- if ( fs::is_directory( filepath ) )
- {
- //std::cout << "\nIn directory: " << filepath.directory_string() << std::endl;
-
- fs::directory_iterator end_iter;
- //write and register single precision sources:
- for ( fs::directory_iterator alignment_itr( filepath );
- alignment_itr != end_iter;
- ++alignment_itr )
- {
- if (fs::is_directory( alignment_itr->path() ))
- {
- std::string subfolder = alignment_itr->path().filename();
- if( subfolder.find("align") == std::string::npos )
- continue;
- writeKernelInit(kernel_file, dirname, subfolder, false);
- } //if is_directory
- } //for alignment_iterator
- kernel_file << std::endl;
- } //if is_directory
- else
- std::cerr << "Cannot access directory " << dirname << std::endl;
-
- //Final Step: Write file tail:
- kernel_file << " } //namespace kernels" << std::endl;
- kernel_file << " } //namespace linalg" << std::endl;
- kernel_file << "} //namespace viennacl" << std::endl;
- kernel_file << "#endif" << std::endl;
- kernel_file.close();
-}
-
-void createHeaders(const char * dirname)
-{
- createKernelFile(dirname);
- createSourceFile(dirname);
-}
-
-int main(int args, char * argsv[])
-{
- createHeaders("compressed_matrix");
- createHeaders("coordinate_matrix");
- createHeaders("ell_matrix");
- createHeaders("hyb_matrix");
- createHeaders("matrix_row");
- createHeaders("matrix_col");
- createHeaders("matrix_prod_row_row_row");
- createHeaders("matrix_prod_row_row_col");
- createHeaders("matrix_prod_row_col_row");
- createHeaders("matrix_prod_row_col_col");
- createHeaders("matrix_prod_col_row_row");
- createHeaders("matrix_prod_col_row_col");
- createHeaders("matrix_prod_col_col_row");
- createHeaders("matrix_prod_col_col_col");
- createHeaders("matrix_solve_col_col");
- createHeaders("matrix_solve_col_row");
- createHeaders("matrix_solve_row_col");
- createHeaders("matrix_solve_row_row");
- createHeaders("scalar");
- createHeaders("vector");
- createHeaders("fft");
- createHeaders("svd");
- createHeaders("spai");
- createHeaders("nmf");
-}
-
diff --git a/auxiliary/ell_matrix/align1/vec_mul.cl b/auxiliary/ell_matrix/align1/vec_mul.cl
deleted file mode 100644
index ba427bf..0000000
--- a/auxiliary/ell_matrix/align1/vec_mul.cl
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-__kernel void vec_mul(
- const __global int* coords,
- const __global float* elements,
- const __global const float * vector,
- __global float * result,
- const unsigned int row_num,
- const unsigned int col_num,
- const unsigned int internal_row_num,
- const unsigned int items_per_row,
- const unsigned int aligned_items_per_row
- )
-{
- uint glb_id = get_global_id(0);
- uint glb_sz = get_global_size(0);
-
- for(uint row_id = glb_id; row_id < row_num; row_id += glb_sz)
- {
- float sum = 0;
-
- uint offset = row_id;
- for(uint item_id = 0; item_id < items_per_row; item_id++, offset += internal_row_num)
- {
- float val = elements[offset];
-
-
- if(val != 0.0f)
- {
- int col = coords[offset];
- sum += (vector[col] * val);
- }
-
- }
-
- result[row_id] = sum;
- }
-}
\ No newline at end of file
diff --git a/auxiliary/generate-blas3-prod-align1.cpp b/auxiliary/generate-blas3-prod-align1.cpp
deleted file mode 100755
index c15d89f..0000000
--- a/auxiliary/generate-blas3-prod-align1.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
-* Generates BLAS level 3 routines
-*/
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-
-#include <iostream>
-#include <stdlib.h>
-
-//generate code for C = op1(A) * op2(B), where A, B, C can have different storage layouts and opX(D) = D or trans(D)
-void printMatrixMatrixProduct(bool row_major_A, bool row_major_B, bool row_major_C,
- bool transpose_A, bool transpose_B)
-{
- //write header:
- std::cout << "// file automatically generated - do not edit!" << std::endl;
- std::cout << "// matrix-matrix multiplication C = ";
- if (transpose_A)
- std::cout << "A^T * ";
- else
- std::cout << "A * ";
- if (transpose_B)
- std::cout << "B^T" << std::endl;
- else
- std::cout << "B" << std::endl;
- std::cout << "// matrix layouts: ";
- if (row_major_C)
- std::cout << "C...row_major, ";
- else
- std::cout << "C...col_major, ";
- if (row_major_A)
- std::cout << "A...row_major, ";
- else
- std::cout << "A...col_major, ";
- if (row_major_B)
- std::cout << "B...row_major" << std::endl;
- else
- std::cout << "B...col_major" << std::endl;
-
- //start OpenCL code:
- std::cout << "__kernel void prod_";
- if (transpose_A)
- std::cout << "T";
- else
- std::cout << "A";
- if (transpose_B)
- std::cout << "T";
- else
- std::cout << "A";
-
- std::cout << "(" << std::endl;
- std::cout << " float alpha," << std::endl;
- std::cout << " __global const float * A," << std::endl;
- std::cout << " unsigned int A_row_start," << std::endl;
- std::cout << " unsigned int A_col_start," << std::endl;
- std::cout << " unsigned int A_row_inc," << std::endl;
- std::cout << " unsigned int A_col_inc," << std::endl;
- std::cout << " unsigned int A_row_size," << std::endl; //number of elements starting from row_start!
- std::cout << " unsigned int A_col_size," << std::endl;
- std::cout << " unsigned int A_internal_rows," << std::endl;
- std::cout << " unsigned int A_internal_cols," << std::endl;
- std::cout << " __global const float * B, " << std::endl;
- std::cout << " unsigned int B_row_start," << std::endl;
- std::cout << " unsigned int B_col_start," << std::endl;
- std::cout << " unsigned int B_row_inc," << std::endl;
- std::cout << " unsigned int B_col_inc," << std::endl;
- std::cout << " unsigned int B_row_size," << std::endl;
- std::cout << " unsigned int B_col_size," << std::endl;
- std::cout << " unsigned int B_internal_rows," << std::endl;
- std::cout << " unsigned int B_internal_cols," << std::endl;
- std::cout << " float beta," << std::endl;
- std::cout << " __global float * C," << std::endl;
- std::cout << " unsigned int C_row_start," << std::endl;
- std::cout << " unsigned int C_col_start," << std::endl;
- std::cout << " unsigned int C_row_inc," << std::endl;
- std::cout << " unsigned int C_col_inc," << std::endl;
- std::cout << " unsigned int C_row_size," << std::endl;
- std::cout << " unsigned int C_col_size," << std::endl;
- std::cout << " unsigned int C_internal_rows," << std::endl;
- std::cout << " unsigned int C_internal_cols) " << std::endl;
- std::cout << "{ " << std::endl;
- std::cout << std::endl;
- std::cout << " __local float bufA[" << 16 * 17 << "];" << std::endl;
- std::cout << " __local float bufB[" << 16 * 17 << "];" << std::endl;
- std::cout << std::endl;
- //do not forgot to change block_size !!!
- std::cout << " size_t block_size = 16;//get_local_size(0);" << std::endl;
- std::cout << " size_t row_block_id = get_group_id(0);" << std::endl;
- std::cout << " size_t col_block_id = get_group_id(1);" << std::endl;
- std::cout << " size_t row_thread_id = get_local_id(0);" << std::endl;
- std::cout << " size_t col_thread_id = get_local_id(1);" << std::endl;
-
- //traverse block row of A (taking mem layout and transpose operation into account)
- if (row_major_A && transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * block_size * A_col_inc + A_col_start) + A_row_start * A_internal_cols;" << std::endl;
- std::cout << " size_t aStep = block_size * A_row_inc * A_internal_cols;" << std::endl;
- }
- else if (row_major_A && !transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * block_size * A_row_inc + A_row_start) * A_internal_cols + A_col_start;" << std::endl;
- std::cout << " size_t aStep = block_size * A_col_inc;" << std::endl;
- }
- else if (!row_major_A && transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * block_size * A_col_inc + A_col_start) * A_internal_rows + A_row_start;" << std::endl;
- std::cout << " size_t aStep = block_size * A_row_inc;" << std::endl;
- }
- else if (!row_major_A && !transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * block_size * A_row_inc + A_row_start) + A_col_start * A_internal_rows;" << std::endl;
- std::cout << " size_t aStep = block_size * A_col_inc * A_internal_rows;" << std::endl;
- }
-
-
- if (row_major_B && transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * block_size * B_row_inc + B_row_start) * B_internal_cols + B_col_start;" << std::endl;
- std::cout << " size_t bStep = block_size * B_col_inc;" << std::endl;
- }
- else if (row_major_B && !transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * block_size * B_col_inc + B_col_start) + B_row_start * B_internal_cols;" << std::endl;
- std::cout << " size_t bStep = block_size * B_internal_cols * B_row_inc;" << std::endl;
- }
- else if (!row_major_B && transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * block_size * B_row_inc + B_row_start) + B_col_start * B_internal_rows;" << std::endl;
- std::cout << " size_t bStep = block_size * B_internal_rows * B_col_inc;" << std::endl;
- }
- else if (!row_major_B && !transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * block_size * B_col_inc + B_col_start) * B_internal_rows + B_row_start;" << std::endl;
- std::cout << " size_t bStep = block_size * B_row_inc;" << std::endl;
- }
-
-
- if (transpose_A)
- std::cout << " size_t block_num = (A_row_size + block_size - 1) / block_size;" << std::endl;
- else
- std::cout << " size_t block_num = (A_col_size + block_size - 1) / block_size;" << std::endl;
-
- std::cout << " float Csub = 0;" << std::endl;
-
- //offset of the the memory access by the thread relative to the beginning of the block:
- if (row_major_A)
- std::cout << " size_t aOffset = row_thread_id * A_row_inc + col_thread_id * A_col_inc * A_internal_cols;" << std::endl;
- else
- std::cout << " size_t aOffset = row_thread_id * A_row_inc + col_thread_id * A_col_inc * A_internal_rows;" << std::endl;
-
- if (row_major_B)
- std::cout << " size_t bOffset = row_thread_id * B_row_inc + col_thread_id * B_col_inc * B_internal_cols;" << std::endl;
- else
- std::cout << " size_t bOffset = row_thread_id * B_row_inc + col_thread_id * B_col_inc * B_internal_rows;" << std::endl;
-
- std::cout << std::endl;
-
- std::cout << " size_t row_thread_id_times_block_size = row_thread_id * (block_size + 1);" << std::endl;
- std::cout << " size_t col_thread_id_times_block_size = col_thread_id * (block_size + 1);" << std::endl;
-
- std::cout << " for (size_t block = 0;" << std::endl;
- std::cout << " block < block_num;" << std::endl;
- std::cout << " ++block)" << std::endl;
- std::cout << " {" << std::endl;
-
- //read block from A and check for access within matrix:
-/* if (transpose_A)
- std::cout << " if (block * block_size + col_thread_id < A_rows && get_global_id(0) < A_cols)" << std::endl;
- else
- std::cout << " if (block * block_size + col_thread_id < A_cols && get_global_id(0) < A_rows)" << std::endl;
-
- std::cout << " bufA[row_thread_id * block_size + col_thread_id] = A[aBegin + aOffset];" << std::endl;
- std::cout << " else" << std::endl;
- std::cout << " bufA[row_thread_id * block_size + col_thread_id] = 0;" << std::endl;*/
-
- if (transpose_A && row_major_A)
- std::cout << " bufA[row_thread_id_times_block_size + col_thread_id] = ((block * block_size + col_thread_id < A_row_size) && (row_block_id * block_size + row_thread_id < A_col_size)) ? A[aBegin + aOffset] : 0;" << std::endl;
- else if (transpose_A && !row_major_A)
- std::cout << " bufA[col_thread_id_times_block_size + row_thread_id] = ((block * block_size + row_thread_id < A_row_size) && (row_block_id * block_size + col_thread_id < A_col_size)) ? A[aBegin + aOffset] : 0;" << std::endl;
- else if (!transpose_A && row_major_A)
- std::cout << " bufA[col_thread_id_times_block_size + row_thread_id] = ((block * block_size + row_thread_id < A_col_size) && (row_block_id * block_size + col_thread_id < A_row_size)) ? A[aBegin + aOffset] : 0;" << std::endl;
- else if (!transpose_A && !row_major_A)
- std::cout << " bufA[row_thread_id_times_block_size + col_thread_id] = ((block * block_size + col_thread_id < A_col_size) && (row_block_id * block_size + row_thread_id < A_row_size)) ? A[aBegin + aOffset] : 0;" << std::endl;
-
-
- if (transpose_B && row_major_B)
- std::cout << " bufB[col_thread_id_times_block_size + row_thread_id] = ((block * block_size + row_thread_id < B_col_size) && (col_block_id * block_size + col_thread_id < B_row_size)) ? B[bBegin + bOffset] : 0;" << std::endl;
- else if (transpose_B && !row_major_B)
- std::cout << " bufB[row_thread_id_times_block_size + col_thread_id] = ((block * block_size + col_thread_id < B_col_size) && (col_block_id * block_size + row_thread_id < B_row_size)) ? B[bBegin + bOffset] : 0;" << std::endl;
- else if (!transpose_B && row_major_B)
- std::cout << " bufB[row_thread_id_times_block_size + col_thread_id] = ((block * block_size + col_thread_id < B_row_size) && (col_block_id * block_size + row_thread_id < B_col_size)) ? B[bBegin + bOffset] : 0;" << std::endl;
- else if (!transpose_B && !row_major_B)
- std::cout << " bufB[col_thread_id_times_block_size + row_thread_id] = ((block * block_size + row_thread_id < B_row_size) && (col_block_id * block_size + col_thread_id < B_col_size)) ? B[bBegin + bOffset] : 0;" << std::endl;
-
- //computation of block-matrix-matrix product is the same for all cases:
- std::cout << " barrier(CLK_LOCAL_MEM_FENCE);" << std::endl;
- //std::cout << " for (size_t k = 0; k < block_size; ++k)" << std::endl;
- //std::cout << " Csub += bufA[row_thread_id_times_block_size + k] * bufB[k * block_size + col_thread_id];" << std::endl;
- //loop unrolling:
- std::cout << " __local float * bufAptr = bufA + row_thread_id_times_block_size;" << std::endl;
- std::cout << " __local float * bufBptr = bufB + col_thread_id_times_block_size;" << std::endl;
- //std::cout << " Csub += bufA[row_thread_id_times_block_size] * bufB[col_thread_id * block_size];" << std::endl;
- // code in following line depends on block size and must be changed in case of block_size changes
- std::cout << " for(int i = 0; i < 4; i++) {" << std::endl;
- for (size_t unroll = 0; unroll < 4; ++unroll) {
- std::cout << " Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;" << std::endl;
- }
- std::cout << " }" << std::endl;
- //std::cout << " Csub += bufAptr[" << i << "] * bufB[" << i << " + col_thread_id * block_size];" << std::endl;
- //std::cout << " Csub += bufAptr[" << i << "] * bufB[" << i << " * block_size + col_thread_id];" << std::endl;
- //std::cout << " Csub += bufAptr[" << i << "] * bufB[" << i << "];" << std::endl;
- std::cout << " barrier(CLK_LOCAL_MEM_FENCE);" << std::endl;
- std::cout << " aBegin += aStep;" << std::endl;
- std::cout << " bBegin += bStep;" << std::endl;
- std::cout << " }" << std::endl;
-
-
- if (transpose_A)
- std::cout << " if (get_global_id(0) < A_col_size && ";
- else
- std::cout << " if (get_global_id(0) < A_row_size && ";
-
- if (transpose_B)
- std::cout << "get_global_id(1) < B_row_size)" << std::endl;
- else
- std::cout << "get_global_id(1) < B_col_size)" << std::endl;
-
- if (row_major_C)
- std::cout << " C[(get_global_id(0) * C_row_inc + C_row_start) * C_internal_cols + get_global_id(1) * C_col_inc + C_col_start] = alpha * Csub + beta * C[(get_global_id(0) * C_row_inc + C_row_start) * C_internal_cols + get_global_id(1) * C_col_inc + C_col_start];" << std::endl;
- else
- std::cout << " C[get_global_id(0) * C_row_inc + C_row_start + (get_global_id(1) * C_col_inc + C_col_start) * C_internal_rows] = alpha * Csub + beta * C[get_global_id(0) * C_row_inc + C_row_start + (get_global_id(1) * C_col_inc + C_col_start) * C_internal_rows];" << std::endl;
- std::cout << "}" << std::endl;
-
-}
-
-void printUsage()
-{
- std::cout << "Must have five parameters for C = A * B:" << std::endl;
- std::cout << " 0/1 : storage layout for A (column_major/row_major)" << std::endl;
- std::cout << " 0/1 : storage layout for B (column_major/row_major)" << std::endl;
- std::cout << " 0/1 : storage layout for C (column_major/row_major)" << std::endl;
- std::cout << " 0/1 : transpose for A (no/yes)" << std::endl;
- std::cout << " 0/1 : transpose for B (no/yes)" << std::endl;
-}
-
-void readParameter(bool & param, char input)
-{
- if (input == '0')
- param = false;
- else if (input == '1')
- param = true;
- else
- {
- printUsage();
- exit(EXIT_FAILURE);
- }
-}
-
-int main(int args, char * argsv[])
-{
- if (args != 6)
- {
- printUsage();
- exit(EXIT_FAILURE);
- }
-
- //the following flags are 'true' for row_major layout
- bool layout_A;
- bool layout_B;
- bool layout_C;
-
- readParameter(layout_A, argsv[1][0]);
- readParameter(layout_B, argsv[2][0]);
- readParameter(layout_C, argsv[3][0]);
-
- bool transpose_A;
- bool transpose_B;
- readParameter(transpose_A, argsv[4][0]);
- readParameter(transpose_B, argsv[5][0]);
-
-
- printMatrixMatrixProduct(layout_A, layout_B, layout_C, transpose_A, transpose_B);
-}
diff --git a/auxiliary/generate-blas3-prod16-align1.cpp b/auxiliary/generate-blas3-prod16-align1.cpp
deleted file mode 100644
index 78d21b3..0000000
--- a/auxiliary/generate-blas3-prod16-align1.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
-* Generates BLAS level 3 routines
-*/
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-
-#include <iostream>
-#include <stdlib.h>
-
-//generate code for C = alpha * op1(A) * op2(B) + beta * C, where A, B, C can have different storage layouts and opX(D) = D or trans(D)
-void printMatrixMatrixProduct(bool row_major_A, bool row_major_B, bool row_major_C,
- bool transpose_A, bool transpose_B)
-{
- std::size_t vector_size = 4;
- std::size_t block_size = 16;
-
- //write header:
- std::cout << "// file automatically generated - do not edit!" << std::endl;
- std::cout << "// matrix-matrix multiplication C = ";
- if (transpose_A)
- std::cout << "A^T * ";
- else
- std::cout << "A * ";
- if (transpose_B)
- std::cout << "B^T" << std::endl;
- else
- std::cout << "B" << std::endl;
- std::cout << "// matrix layouts: ";
- if (row_major_C)
- std::cout << "C...row_major, ";
- else
- std::cout << "C...col_major, ";
- if (row_major_A)
- std::cout << "A...row_major, ";
- else
- std::cout << "A...col_major, ";
- if (row_major_B)
- std::cout << "B...row_major" << std::endl;
- else
- std::cout << "B...col_major" << std::endl;
-
- //start OpenCL code:
- std::cout << "__kernel void prod16_";
- if (transpose_A)
- std::cout << "T";
- else
- std::cout << "A";
- if (transpose_B)
- std::cout << "T";
- else
- std::cout << "A";
-
- std::cout << "(" << std::endl;
- std::cout << " float alpha," << std::endl;
- std::cout << " __global const float * A," << std::endl;
- std::cout << " unsigned int A_row_start," << std::endl;
- std::cout << " unsigned int A_col_start," << std::endl;
- std::cout << " unsigned int A_row_inc," << std::endl;
- std::cout << " unsigned int A_col_inc," << std::endl;
- std::cout << " unsigned int A_row_size," << std::endl; //number of elements starting from row_start, using an increment of A_row_inc
- std::cout << " unsigned int A_col_size," << std::endl;
- std::cout << " unsigned int A_internal_rows," << std::endl;
- std::cout << " unsigned int A_internal_cols," << std::endl;
- std::cout << " __global const float * B, " << std::endl;
- std::cout << " unsigned int B_row_start," << std::endl;
- std::cout << " unsigned int B_col_start," << std::endl;
- std::cout << " unsigned int B_row_inc," << std::endl;
- std::cout << " unsigned int B_col_inc," << std::endl;
- std::cout << " unsigned int B_row_size," << std::endl;
- std::cout << " unsigned int B_col_size," << std::endl;
- std::cout << " unsigned int B_internal_rows," << std::endl;
- std::cout << " unsigned int B_internal_cols," << std::endl;
- std::cout << " float beta," << std::endl;
- std::cout << " __global float * C," << std::endl;
- std::cout << " unsigned int C_row_start," << std::endl;
- std::cout << " unsigned int C_col_start," << std::endl;
- std::cout << " unsigned int C_row_inc," << std::endl;
- std::cout << " unsigned int C_col_inc," << std::endl;
- std::cout << " unsigned int C_row_size," << std::endl;
- std::cout << " unsigned int C_col_size," << std::endl;
- std::cout << " unsigned int C_internal_rows," << std::endl;
- std::cout << " unsigned int C_internal_cols) " << std::endl;
- std::cout << "{ " << std::endl;
- //do not forgot to change block_size !!!
- std::cout << " size_t row_block_id = get_group_id(1);" << std::endl; //refers to the row index in op(A), op(B)
- std::cout << " size_t col_block_id = get_group_id(0);" << std::endl; //refers to the col index in op(A), op(B)
- std::cout << " size_t row_thread_id = get_local_id(1);" << std::endl;
- std::cout << " size_t col_thread_id = get_local_id(0);" << std::endl;
- std::cout << std::endl;
- std::cout << " __local float As[" << block_size * block_size << "];" << std::endl;
- std::cout << std::endl;
- std::cout << " float cv[" << block_size << "] = {";
- for (std::size_t i=0; i<block_size-1; ++i)
- std::cout << "0,";
- std::cout << "0};" << std::endl;
-
- //traverse block row of A (taking mem layout and transpose operation into account)
- if (row_major_A && transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * " << block_size << " * A_col_inc + A_col_start) + A_row_start * A_internal_cols;" << std::endl;
- std::cout << " size_t aStep = " << block_size << " * A_internal_cols * A_row_inc;" << std::endl;
- std::cout << " size_t aEnd = aBegin + A_internal_cols * A_row_inc * A_row_size;" << std::endl;
- }
- else if (row_major_A && !transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * " << block_size << " * A_row_inc + A_row_start) * A_internal_cols + A_col_start;" << std::endl;
- std::cout << " size_t aStep = " << block_size << " * A_col_inc;" << std::endl;
- std::cout << " size_t aEnd = aBegin + A_col_inc * A_col_size;" << std::endl;
- }
- else if (!row_major_A && transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * " << block_size << " * A_col_inc + A_col_start) * A_internal_rows + A_row_start;" << std::endl;
- std::cout << " size_t aStep = " << block_size << " * A_row_inc;" << std::endl;
- std::cout << " size_t aEnd = aBegin + A_row_inc * A_row_size;" << std::endl;
- }
- else if (!row_major_A && !transpose_A)
- {
- std::cout << " size_t aBegin = (row_block_id * " << block_size << " * A_row_inc + A_row_start) + A_col_start * A_internal_rows;" << std::endl;
- std::cout << " size_t aStep = " << block_size << " * A_internal_rows * A_col_inc;" << std::endl;
- std::cout << " size_t aEnd = aBegin + A_internal_rows * A_col_inc * A_col_size;" << std::endl;
- }
-
-
- if (row_major_B && transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * " << block_size * vector_size << " * B_row_inc + B_row_start) * B_internal_cols + B_col_start;" << std::endl;
- std::cout << " size_t bStep = " << block_size << " * B_col_inc;" << std::endl;
- }
- else if (row_major_B && !transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * " << block_size * vector_size << " * B_col_inc + B_col_start) + B_row_start * B_internal_cols;" << std::endl;
- std::cout << " size_t bStep = " << block_size << " * B_row_inc * B_internal_cols;" << std::endl;
- }
- else if (!row_major_B && transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * " << block_size * vector_size << " * B_row_inc + B_row_start) + B_col_start * B_internal_rows;" << std::endl;
- std::cout << " size_t bStep = " << block_size << " * B_col_inc * B_internal_rows;" << std::endl;
- }
- else if (!row_major_B && !transpose_B)
- {
- std::cout << " size_t bBegin = (col_block_id * " << block_size * vector_size << " * B_col_inc + B_col_start) * B_internal_rows + B_row_start;" << std::endl;
- std::cout << " size_t bStep = " << block_size << " * B_row_inc;" << std::endl;
- }
-
- std::cout << " for(size_t a = aBegin, b = bBegin; a < aEnd; a += aStep, b += bStep) { " << std::endl;
-
- // copy blocks of op(A) to shared memory (op(A) is column-major in shared memory then)
- std::cout << " for(size_t i = 0; i < " << vector_size << "; i++) " << std::endl;
- if (row_major_A && transpose_A)
- std::cout << " As[ (i*" << vector_size << " + row_thread_id) + " << block_size << " * col_thread_id] = (A[a + A_col_inc * (i * " << vector_size << " + row_thread_id) + A_internal_cols * A_row_inc * col_thread_id]);" << std::endl;
- else if (row_major_A && !transpose_A)
- std::cout << " As[ (i*" << vector_size << " + row_thread_id) + " << block_size << " * col_thread_id] = (A[a + A_internal_cols * A_row_inc * (i * " << vector_size << " + row_thread_id) + A_col_inc * col_thread_id]);" << std::endl;
- else if (!row_major_A && transpose_A)
- std::cout << " As[ (i*" << vector_size << " + row_thread_id) + " << block_size << " * col_thread_id] = (A[a + A_internal_rows * A_col_inc * (i * " << vector_size << " + row_thread_id) + A_row_inc * col_thread_id]);" << std::endl;
- else if (!row_major_A && !transpose_A)
- std::cout << " As[ (i*" << vector_size << " + row_thread_id) + " << block_size << " * col_thread_id] = (A[a + A_row_inc * (i * " << vector_size << " + row_thread_id) + A_internal_rows * A_col_inc * col_thread_id]);" << std::endl;
- std::cout << std::endl;
- std::cout << " barrier(CLK_LOCAL_MEM_FENCE); " << std::endl;
-
- // initialize memory pointers
- std::cout << std::endl;
- std::cout << " __local float *ap = As; " << std::endl;
- if (row_major_B && transpose_B)
- std::cout << " __global float *bp = B + (b + (" << block_size << " * row_thread_id + col_thread_id) * B_row_inc * B_internal_cols); " << std::endl;
- else if (row_major_B && !transpose_B)
- std::cout << " __global float *bp = B + (b + (" << block_size << " * row_thread_id + col_thread_id) * B_col_inc); " << std::endl;
- else if (!row_major_B && transpose_B)
- std::cout << " __global float *bp = B + (b + (" << block_size << " * row_thread_id + col_thread_id) * B_row_inc); " << std::endl;
- else if (!row_major_B && !transpose_B)
- std::cout << " __global float *bp = B + (b + (" << block_size << " * row_thread_id + col_thread_id) * B_col_inc * B_internal_rows); " << std::endl;
- std::cout << std::endl;
-
- // run computations
- std::cout << " for(size_t i = 0; i < " << block_size << "; i++) { " << std::endl;
- if (row_major_B && transpose_B)
- std::cout << " float bv = bp[i]; " << std::endl;
- else if (row_major_B && !transpose_B)
- std::cout << " float bv = bp[i * B_internal_cols]; " << std::endl;
- else if (!row_major_B && transpose_B)
- std::cout << " float bv = bp[i * B_internal_rows]; " << std::endl;
- else if (!row_major_B && !transpose_B)
- std::cout << " float bv = bp[i]; " << std::endl;
- std::cout << std::endl;
- std::cout << " for(size_t k = 0; k < " << block_size << "; k++) " << std::endl;
- std::cout << " cv[k] += ap[k] * bv; " << std::endl;
- std::cout << std::endl;
- std::cout << " ap += " << block_size << "; " << std::endl;
- std::cout << " } " << std::endl;
- std::cout << std::endl;
- std::cout << " barrier(CLK_LOCAL_MEM_FENCE); " << std::endl;
- std::cout << " } " << std::endl;
-
- // write to C
- if (row_major_C)
- {
- std::cout << " int c = C_internal_cols * (C_row_inc * " << block_size << " * row_block_id + C_row_start) + " //block row index
- << vector_size * block_size << " * C_col_inc * col_block_id + C_col_start " << std::endl; //block column index
- std::cout << " + C_col_inc * (" << block_size << " * row_thread_id + col_thread_id); " << std::endl;
- }
- else
- {
- std::cout << " int c = C_row_inc * " << block_size << " * row_block_id + C_row_start + (" // block row index
- << vector_size * block_size << " * C_col_inc * col_block_id + C_col_start) * C_internal_rows " << std::endl; // block column index
- std::cout << " + C_internal_rows * C_col_inc * (" << block_size << " * row_thread_id + col_thread_id); " << std::endl;
- }
-
- std::cout << " for(size_t i = 0; i < " << block_size << "; i++) { " << std::endl;
-
- if (row_major_C)
- {
- std::cout << " C[c] = alpha * cv[i] + beta * C[c]; " << std::endl;
- std::cout << " c += C_internal_cols * C_row_inc; " << std::endl;
- }
- else
- {
- std::cout << " C[c] = alpha * cv[i] + beta * C[c]; " << std::endl;
- std::cout << " c += C_row_inc; " << std::endl;
- }
-
- std::cout << " } " << std::endl;
- std::cout << "} " << std::endl;
-
-
-
-// if (row_major_C)
-// std::cout << " C[(get_global_id(0) * C_row_inc + C_row_start) * C_internal_cols + get_global_id(1) * C_col_inc + C_col_start] = Csub;" << std::endl;
-// else
-// std::cout << " C[get_global_id(0) * C_row_inc + C_row_start + (get_global_id(1) * C_col_inc + C_col_start) * C_internal_rows] = Csub;" << std::endl;
-
-}
-
-void printUsage()
-{
- std::cout << "Must have five parameters for C = A * B:" << std::endl;
- std::cout << " 0/1 : storage layout for A (column_major/row_major)" << std::endl;
- std::cout << " 0/1 : storage layout for B (column_major/row_major)" << std::endl;
- std::cout << " 0/1 : storage layout for C (column_major/row_major)" << std::endl;
- std::cout << " 0/1 : transpose for A (no/yes)" << std::endl;
- std::cout << " 0/1 : transpose for B (no/yes)" << std::endl;
-}
-
-void readParameter(bool & param, char input)
-{
- if (input == '0')
- param = false;
- else if (input == '1')
- param = true;
- else
- {
- printUsage();
- exit(EXIT_FAILURE);
- }
-}
-
-int main(int args, char * argsv[])
-{
- if (args != 6)
- {
- printUsage();
- exit(EXIT_FAILURE);
- }
-
- //the following flags are 'true' for row_major layout
- bool layout_A;
- bool layout_B;
- bool layout_C;
-
- readParameter(layout_A, argsv[1][0]);
- readParameter(layout_B, argsv[2][0]);
- readParameter(layout_C, argsv[3][0]);
-
- bool transpose_A;
- bool transpose_B;
- readParameter(transpose_A, argsv[4][0]);
- readParameter(transpose_B, argsv[5][0]);
-
-
- printMatrixMatrixProduct(layout_A, layout_B, layout_C, transpose_A, transpose_B);
-}
diff --git a/auxiliary/hyb_matrix/align1/vec_mul.cl b/auxiliary/hyb_matrix/align1/vec_mul.cl
deleted file mode 100644
index b921c75..0000000
--- a/auxiliary/hyb_matrix/align1/vec_mul.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-
-__kernel void vec_mul(
- const __global int* ell_coords,
- const __global float* ell_elements,
- const __global uint* csr_rows,
- const __global uint* csr_cols,
- const __global float* csr_elements,
- const __global float * vector,
-
- __global float * result,
-
- unsigned int row_num,
- unsigned int internal_row_num,
- unsigned int items_per_row,
- unsigned int aligned_items_per_row
- )
-{
- uint glb_id = get_global_id(0);
- uint glb_sz = get_global_size(0);
-
- for(uint row_id = glb_id; row_id < row_num; row_id += glb_sz)
- {
- float sum = 0;
-
- uint offset = row_id;
- for(uint item_id = 0; item_id < items_per_row; item_id++, offset += internal_row_num)
- {
- float val = ell_elements[offset];
-
-
- if(val != 0.0f)
- {
- int col = ell_coords[offset];
- sum += (vector[col] * val);
- }
-
- }
-
- uint col_begin = csr_rows[row_id];
- uint col_end = csr_rows[row_id + 1];
-
- for(uint item_id = col_begin; item_id < col_end; item_id++)
- {
- sum += (vector[csr_cols[item_id]] * csr_elements[item_id]);
- }
-
- result[row_id] = sum;
- }
-}
\ No newline at end of file
diff --git a/auxiliary/matrix_col/align1/add.cl b/auxiliary/matrix_col/align1/add.cl
deleted file mode 100644
index 89ed2ee..0000000
--- a/auxiliary/matrix_col/align1/add.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-
-__kernel void add( // C = A + B
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols,
- __global float * C,
- unsigned int C_row_start,
- unsigned int C_col_start,
- unsigned int C_row_inc,
- unsigned int C_col_inc,
- unsigned int C_row_size,
- unsigned int C_col_size,
- unsigned int C_internal_rows,
- unsigned int C_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- C[i * C_row_inc + C_row_start + (j* C_col_inc + C_col_start) * C_internal_rows] =
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows]
- + B[i * B_row_inc + B_row_start + (j * B_col_inc + B_col_start) * B_internal_rows];
-}
diff --git a/auxiliary/matrix_col/align1/assign.cl b/auxiliary/matrix_col/align1/assign.cl
deleted file mode 100644
index c586785..0000000
--- a/auxiliary/matrix_col/align1/assign.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-
-__kernel void assign( // A <- B
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] =
- B[i * B_row_inc + B_row_start + (j * B_col_inc + B_col_start) * B_internal_rows];
-}
-
diff --git a/auxiliary/matrix_col/align1/clear.cl b/auxiliary/matrix_col/align1/clear.cl
deleted file mode 100644
index 90f51e4..0000000
--- a/auxiliary/matrix_col/align1/clear.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-
-__kernel void clear( // A <- 0
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] = 0;
-}
diff --git a/auxiliary/matrix_col/align1/cpu_inplace_mult.cl b/auxiliary/matrix_col/align1/cpu_inplace_mult.cl
deleted file mode 100644
index f259062..0000000
--- a/auxiliary/matrix_col/align1/cpu_inplace_mult.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-__kernel void cpu_inplace_mult( // A *= const
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- float factor)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] *= factor;
-}
-
diff --git a/auxiliary/matrix_col/align1/inplace_add.cl b/auxiliary/matrix_col/align1/inplace_add.cl
deleted file mode 100644
index d4a3933..0000000
--- a/auxiliary/matrix_col/align1/inplace_add.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-
-__kernel void inplace_add( // A += B
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] +=
- B[i * B_row_inc + B_row_start + (j * B_col_inc + B_col_start) * B_internal_rows];
-}
diff --git a/auxiliary/matrix_col/align1/inplace_divide.cl b/auxiliary/matrix_col/align1/inplace_divide.cl
deleted file mode 100644
index 452d487..0000000
--- a/auxiliary/matrix_col/align1/inplace_divide.cl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-__kernel void inplace_divide( // A /= const
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * fac) //note: CPU variant is mapped to prod_scalar
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] /= factor;
-}
-
diff --git a/auxiliary/matrix_col/align1/inplace_mult.cl b/auxiliary/matrix_col/align1/inplace_mult.cl
deleted file mode 100644
index 04555aa..0000000
--- a/auxiliary/matrix_col/align1/inplace_mult.cl
+++ /dev/null
@@ -1,20 +0,0 @@
-
-__kernel void inplace_mult( // A *= const
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * fac)
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] *= factor;
-}
-
-
diff --git a/auxiliary/matrix_col/align1/inplace_sub.cl b/auxiliary/matrix_col/align1/inplace_sub.cl
deleted file mode 100644
index 5f02bcb..0000000
--- a/auxiliary/matrix_col/align1/inplace_sub.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-
-__kernel void inplace_sub( // A -= B
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows] -=
- B[i * B_row_inc + B_row_start + (j * B_col_inc + B_col_start) * B_internal_rows];
-}
-
diff --git a/auxiliary/matrix_col/align1/sub.cl b/auxiliary/matrix_col/align1/sub.cl
deleted file mode 100644
index 7279ad7..0000000
--- a/auxiliary/matrix_col/align1/sub.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-
-__kernel void sub( // C = A - B
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols,
- __global float * C,
- unsigned int C_row_start,
- unsigned int C_col_start,
- unsigned int C_row_inc,
- unsigned int C_col_inc,
- unsigned int C_row_size,
- unsigned int C_col_size,
- unsigned int C_internal_rows,
- unsigned int C_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- C[i * C_row_inc + C_row_start + (j * C_col_inc + C_col_start) * C_internal_rows] =
- A[i * A_row_inc + A_row_start + (j * A_col_inc + A_col_start) * A_internal_rows]
- - B[i * B_row_inc + B_row_start + (j * B_col_inc + B_col_start) * B_internal_rows];
-}
diff --git a/auxiliary/matrix_col/align1/trans_vec_mul.cl b/auxiliary/matrix_col/align1/trans_vec_mul.cl
deleted file mode 100644
index 17fbef8..0000000
--- a/auxiliary/matrix_col/align1/trans_vec_mul.cl
+++ /dev/null
@@ -1,28 +0,0 @@
-
-__kernel void trans_vec_mul(
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * v,
- unsigned int v_start,
- unsigned int v_inc,
- unsigned int v_size,
- __global float * result,
- unsigned int result_start,
- unsigned int result_inc,
- unsigned int result_size)
-{
- for (unsigned int row = get_global_id(0); row < A_col_size; row += get_global_size(0))
- {
- float dot_prod = 0;
- for (unsigned int col = 0; col < A_row_size; ++col)
- dot_prod += A[(row * A_col_inc + A_col_start) * A_internal_rows + col * A_row_inc + A_row_start] * v[v_start + col * v_inc];
- result[row * result_inc + result_start] = dot_prod;
- }
-}
diff --git a/auxiliary/matrix_col/align1/vec_mul.cl b/auxiliary/matrix_col/align1/vec_mul.cl
deleted file mode 100644
index ea722c7..0000000
--- a/auxiliary/matrix_col/align1/vec_mul.cl
+++ /dev/null
@@ -1,28 +0,0 @@
-
-__kernel void vec_mul(
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * v,
- unsigned int v_start,
- unsigned int v_inc,
- unsigned int v_size,
- __global float * result,
- unsigned int result_start,
- unsigned int result_inc,
- unsigned int result_size)
-{
- for (unsigned int row = get_global_id(0); row < A_row_size; row += get_global_size(0))
- {
- float dot_prod = 0;
- for (unsigned int col = 0; col < A_col_size; ++col)
- dot_prod += A[(row * A_row_inc + A_row_start) + (col * A_col_inc + A_col_start) * A_internal_rows] * v[v_start + v_inc * col];
- result[row * result_inc + result_start] = dot_prod;
- }
-}
diff --git a/auxiliary/matrix_row/align1/add.cl b/auxiliary/matrix_row/align1/add.cl
deleted file mode 100644
index ada7283..0000000
--- a/auxiliary/matrix_row/align1/add.cl
+++ /dev/null
@@ -1,37 +0,0 @@
-
-__kernel void add( // C = A + B
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols,
- __global float * C,
- unsigned int C_row_start,
- unsigned int C_col_start,
- unsigned int C_row_inc,
- unsigned int C_col_inc,
- unsigned int C_row_size,
- unsigned int C_col_size,
- unsigned int C_internal_rows,
- unsigned int C_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- C[(i * C_row_inc + C_row_start) * C_internal_cols + j * C_col_inc + C_col_start] =
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start]
- + B[(i * B_row_inc + B_row_start) * B_internal_cols + j * B_col_inc + B_col_start];
-}
-
diff --git a/auxiliary/matrix_row/align1/assign.cl b/auxiliary/matrix_row/align1/assign.cl
deleted file mode 100644
index af993e8..0000000
--- a/auxiliary/matrix_row/align1/assign.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-
-__kernel void assign( // A <- B
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start]
- = B[(i * B_row_inc + B_row_start) * B_internal_cols + j * B_col_inc + B_col_start];
-}
-
diff --git a/auxiliary/matrix_row/align1/clear.cl b/auxiliary/matrix_row/align1/clear.cl
deleted file mode 100644
index ffe4802..0000000
--- a/auxiliary/matrix_row/align1/clear.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-
-__kernel void clear( // A <- 0
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start] = 0;
-}
diff --git a/auxiliary/matrix_row/align1/cpu_inplace_mult.cl b/auxiliary/matrix_row/align1/cpu_inplace_mult.cl
deleted file mode 100644
index 721e6e1..0000000
--- a/auxiliary/matrix_row/align1/cpu_inplace_mult.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-__kernel void cpu_inplace_mult( // A *= const
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- float factor)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start] *= factor;
-}
diff --git a/auxiliary/matrix_row/align1/inplace_add.cl b/auxiliary/matrix_row/align1/inplace_add.cl
deleted file mode 100644
index 01269b6..0000000
--- a/auxiliary/matrix_row/align1/inplace_add.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-
-__kernel void inplace_add( // A += B
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start] +=
- B[(i * B_row_inc + B_row_start) * B_internal_cols + j * B_col_inc + B_col_start];
-}
-
diff --git a/auxiliary/matrix_row/align1/inplace_divide.cl b/auxiliary/matrix_row/align1/inplace_divide.cl
deleted file mode 100644
index cff4de0..0000000
--- a/auxiliary/matrix_row/align1/inplace_divide.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-__kernel void inplace_divide( // A /= const
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * fac) //note: CPU variant is mapped to prod_scalar
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start] /= factor;
-}
diff --git a/auxiliary/matrix_row/align1/inplace_mult.cl b/auxiliary/matrix_row/align1/inplace_mult.cl
deleted file mode 100644
index 7758b51..0000000
--- a/auxiliary/matrix_row/align1/inplace_mult.cl
+++ /dev/null
@@ -1,20 +0,0 @@
-
-__kernel void inplace_mult( // A *= const
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * fac)
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start] *= factor;
-}
-
-
diff --git a/auxiliary/matrix_row/align1/inplace_sub.cl b/auxiliary/matrix_row/align1/inplace_sub.cl
deleted file mode 100644
index acc10ff..0000000
--- a/auxiliary/matrix_row/align1/inplace_sub.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-
-__kernel void inplace_sub( // A -= B
- __global float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start] -=
- B[(i * B_row_inc + B_row_start) * B_internal_cols + j * B_col_inc + B_col_start];
-}
diff --git a/auxiliary/matrix_row/align1/sub.cl b/auxiliary/matrix_row/align1/sub.cl
deleted file mode 100644
index 8fd4993..0000000
--- a/auxiliary/matrix_row/align1/sub.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-
-__kernel void sub( // C = A - B
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * B,
- unsigned int B_row_start,
- unsigned int B_col_start,
- unsigned int B_row_inc,
- unsigned int B_col_inc,
- unsigned int B_row_size,
- unsigned int B_col_size,
- unsigned int B_internal_rows,
- unsigned int B_internal_cols,
- __global float * C,
- unsigned int C_row_start,
- unsigned int C_col_start,
- unsigned int C_row_inc,
- unsigned int C_col_inc,
- unsigned int C_row_size,
- unsigned int C_col_size,
- unsigned int C_internal_rows,
- unsigned int C_internal_cols)
-{
- for (unsigned int i = get_global_id(0); i < A_row_size; i += get_global_size(0))
- for (unsigned int j = get_global_id(1); j < A_col_size; j += get_global_size(1))
- C[(i * C_row_inc + C_row_start) * C_internal_cols + j * C_col_inc + C_col_start] =
- A[(i * A_row_inc + A_row_start) * A_internal_cols + j * A_col_inc + A_col_start]
- - B[(i * B_row_inc + B_row_start) * B_internal_cols + j * B_col_inc + B_col_start];
-}
diff --git a/auxiliary/matrix_row/align1/trans_vec_mul.cl b/auxiliary/matrix_row/align1/trans_vec_mul.cl
deleted file mode 100644
index c02f621..0000000
--- a/auxiliary/matrix_row/align1/trans_vec_mul.cl
+++ /dev/null
@@ -1,29 +0,0 @@
-
-__kernel void trans_vec_mul(
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * v,
- unsigned int v_start,
- unsigned int v_inc,
- unsigned int v_size,
- __global float * result,
- unsigned int result_start,
- unsigned int result_inc,
- unsigned int result_size)
-{
- for (unsigned int row = get_global_id(0); row < A_col_size; row += get_global_size(0))
- {
- float dot_prod = 0;
- for (unsigned int col = 0; col < A_row_size; ++col)
- dot_prod += A[(row * A_col_inc + A_col_start) + (col * A_row_inc + A_row_start) * A_internal_cols] * v[v_start + v_inc * col];
- result[row * result_inc + result_start] = dot_prod;
- }
-}
-
diff --git a/auxiliary/matrix_row/align1/vec_mul.cl b/auxiliary/matrix_row/align1/vec_mul.cl
deleted file mode 100644
index fab3d36..0000000
--- a/auxiliary/matrix_row/align1/vec_mul.cl
+++ /dev/null
@@ -1,30 +0,0 @@
-
-__kernel void vec_mul(
- __global const float * A,
- unsigned int A_row_start,
- unsigned int A_col_start,
- unsigned int A_row_inc,
- unsigned int A_col_inc,
- unsigned int A_row_size,
- unsigned int A_col_size,
- unsigned int A_internal_rows,
- unsigned int A_internal_cols,
- __global const float * v,
- unsigned int v_start,
- unsigned int v_inc,
- unsigned int v_size,
- __global float * result,
- unsigned int result_start,
- unsigned int result_inc,
- unsigned int result_size)
-{
- for (unsigned int row = get_global_id(0); row < A_row_size; row += get_global_size(0))
- {
- float dot_prod = 0;
- for (unsigned int col = 0; col < A_col_size; ++col)
- dot_prod += A[(row * A_row_inc + A_row_start) * A_internal_cols + col * A_col_inc + A_col_start] * v[v_start + v_inc * col];
- result[row * result_inc + result_start] = dot_prod;
- }
-}
-
-
diff --git a/auxiliary/nmf/align1/el_wise_mul_div.cl b/auxiliary/nmf/align1/el_wise_mul_div.cl
deleted file mode 100644
index d62ec9d..0000000
--- a/auxiliary/nmf/align1/el_wise_mul_div.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-
-__kernel void el_wise_mul_div(
- __global float * matrix1,
- __global const float * matrix2,
- __global const float * matrix3,
- unsigned int size)
-{
- for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))
- {
- float val = matrix1[i] * matrix2[i];
- float divisor = matrix3[i];
- matrix1[i] = (divisor > 0.00001) ? (val / divisor) : 0;
- };
-};
diff --git a/auxiliary/nmf/align1/el_wise_mul_div.cl~ b/auxiliary/nmf/align1/el_wise_mul_div.cl~
deleted file mode 100644
index 79a5405..0000000
--- a/auxiliary/nmf/align1/el_wise_mul_div.cl~
+++ /dev/null
@@ -1,13 +0,0 @@
-
-__kernel void el_wise_mul_div(
- __global float * matrix1,
- __global const float * matrix2,
- __global const float * matrix3,
- unsigned int size)
-{
- for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {
- float val = matrix1[i] * matrix2[i];
- float divisor = matrix3[i];
- matrix1[i] = (divisor > 0.00001) ? (val / divisor) : 0;
- };
-};
diff --git a/auxiliary/nmf/align1/sub_wise.cl b/auxiliary/nmf/align1/sub_wise.cl
deleted file mode 100644
index b1f2b42..0000000
--- a/auxiliary/nmf/align1/sub_wise.cl
+++ /dev/null
@@ -1,10 +0,0 @@
-
-__kernel void sub_wise(
- __global const float * matrix1,
- __global const float * matrix2,
- __global float * result,
- unsigned int size)
-{
- for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))
- result[i] = matrix1[i] - matrix2[i];
-}
diff --git a/auxiliary/nmf/align1/sub_wise.cl~ b/auxiliary/nmf/align1/sub_wise.cl~
deleted file mode 100644
index 79a5405..0000000
--- a/auxiliary/nmf/align1/sub_wise.cl~
+++ /dev/null
@@ -1,13 +0,0 @@
-
-__kernel void el_wise_mul_div(
- __global float * matrix1,
- __global const float * matrix2,
- __global const float * matrix3,
- unsigned int size)
-{
- for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {
- float val = matrix1[i] * matrix2[i];
- float divisor = matrix3[i];
- matrix1[i] = (divisor > 0.00001) ? (val / divisor) : 0;
- };
-};
diff --git a/auxiliary/svd/align1/bidiag_pack.cl b/auxiliary/svd/align1/bidiag_pack.cl
deleted file mode 100644
index 6baa8ed..0000000
--- a/auxiliary/svd/align1/bidiag_pack.cl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-__kernel void bidiag_pack(__global float* A,
- __global float* D,
- __global float* S,
- uint size1,
- uint size2,
- uint stride
- ) {
- uint size = min(size1, size2);
-
- if(get_global_id(0) == 0)
- S[0] = 0.0f;
-
- for(uint i = get_global_id(0); i < size ; i += get_global_size(0)) {
- D[i] = A[i*stride + i];
- S[i + 1] = (i + 1 < size2)?A[i*stride + (i + 1)]:0.0f;
- }
-}
diff --git a/auxiliary/svd/align1/copy_col.cl b/auxiliary/svd/align1/copy_col.cl
deleted file mode 100644
index 6381d76..0000000
--- a/auxiliary/svd/align1/copy_col.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-// probably, this is a ugly way
-__kernel void copy_col(__global float* A,
- __global float* V,
- uint row_start,
- uint col_start,
- uint size,
- uint stride
- ) {
- uint glb_id = get_global_id(0);
- uint glb_sz = get_global_size(0);
-
- for(uint i = row_start + glb_id; i < size; i += glb_sz) {
- V[i - row_start] = A[i * stride + col_start];
- }
-}
diff --git a/auxiliary/svd/align1/copy_row.cl b/auxiliary/svd/align1/copy_row.cl
deleted file mode 100644
index 0d7303a..0000000
--- a/auxiliary/svd/align1/copy_row.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-// probably, this is too
-__kernel void copy_row(__global float* A,
- __global float* V,
- uint row_start,
- uint col_start,
- uint size,
- uint stride
- ) {
- uint glb_id = get_global_id(0);
- uint glb_sz = get_global_size(0);
-
- for(uint i = col_start + glb_id; i < size; i += glb_sz) {
- V[i - col_start] = A[row_start * stride + i];
- }
-}
diff --git a/auxiliary/svd/align1/givens_prev.cl b/auxiliary/svd/align1/givens_prev.cl
deleted file mode 100644
index 8e62007..0000000
--- a/auxiliary/svd/align1/givens_prev.cl
+++ /dev/null
@@ -1,59 +0,0 @@
-
-
-__kernel void givens_prev(__global float* matr,
- __global float* cs,
- __global float* ss,
- uint size,
- uint stride,
- uint start_i,
- uint end_i
- )
-{
- uint glb_id = get_global_id(0);
- uint glb_sz = get_global_size(0);
-
- uint lcl_id = get_local_id(0);
- uint lcl_sz = get_local_size(0);
-
- uint j = glb_id;
-
- __local float cs_lcl[256];
- __local float ss_lcl[256];
-
- float x = (j < size)?matr[(start_i - 1) * stride + j]:0;
-
- uint elems_num = end_i - start_i;
- uint block_num = (elems_num + lcl_sz - 1) / lcl_sz;
-
- for(uint block_id = 0; block_id < block_num; block_id++)
- {
- uint to = min(elems_num - block_id * lcl_sz, lcl_sz);
-
- if(lcl_id < to)
- {
- cs_lcl[lcl_id] = cs[lcl_id + start_i + block_id * lcl_sz];
- ss_lcl[lcl_id] = ss[lcl_id + start_i + block_id * lcl_sz];
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(j < size)
- {
- for(uint ind = 0; ind < to; ind++)
- {
- uint i = ind + start_i + block_id * lcl_sz;
-
- float z = matr[i * stride + j];
-
- float cs_val = cs_lcl[ind];//cs[i];
- float ss_val = ss_lcl[ind];//ss[i];
-
- matr[(i - 1) * stride + j] = x * cs_val + z * ss_val;
- x = -x * ss_val + z * cs_val;
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- if(j < size)
- matr[(end_i - 1) * stride + j] = x;
-}
diff --git a/auxiliary/svd/align1/house_col.cl b/auxiliary/svd/align1/house_col.cl
deleted file mode 100644
index 1a4861a..0000000
--- a/auxiliary/svd/align1/house_col.cl
+++ /dev/null
@@ -1,59 +0,0 @@
-
-// calculates a sum of local array elements
-void col_reduce_lcl_array(__local float* sums, uint lcl_id, uint lcl_sz) {
- uint step = lcl_sz >> 1;
-
- while(step > 0) {
- if(lcl_id < step) {
- sums[lcl_id] += sums[lcl_id + step];
- }
- step >>= 1;
- barrier(CLK_LOCAL_MEM_FENCE);
- }
-}
-
-__kernel void house_col(__global float* A,
- __global float* QL,
- __constant float* V, //householder vector
- uint row_start,
- uint col_start,
- uint size1,
- uint size2,
- uint stride,
- uint strideQ,
- __local float* sums
- ) {
- uint glb_id = get_global_id(0);
- uint glb_sz = get_global_size(0);
-
- uint grp_id = get_group_id(0);
- uint grp_nm = get_num_groups(0);
-
- uint lcl_id = get_local_id(0);
- uint lcl_sz = get_local_size(0);
-
- float ss = 0.0f;
- // update of left matrix
- for(uint i = grp_id; i < size1; i += grp_nm) {
- ss = 0.0f;
- for(uint j = lcl_id; j < size1; j += lcl_sz) ss = ss + (V[j] * QL[i * strideQ + j]);
- sums[lcl_id] = ss;
-
- barrier(CLK_LOCAL_MEM_FENCE);
- col_reduce_lcl_array(sums, lcl_id, lcl_sz);
- barrier(CLK_LOCAL_MEM_FENCE);
-
- float sum_Qv = sums[0];
-
- for(uint j = lcl_id; j < size1; j += lcl_sz)
- QL[i * strideQ + j] = QL[i * strideQ + j] - (2 * V[j] * sum_Qv);
- }
- // doing it in slightly different way to avoid cache misses
- for(uint i = glb_id + col_start; i < size2; i += glb_sz) {
- ss = 0.0f;
- for(uint j = row_start; j < size1; j++) ss = ss + (V[j] * A[j * stride + i]);
-
- for(uint j = row_start; j < size1; j++)
- A[j * stride + i] = A[j * stride + i] - (2 * V[j] * ss);
- }
-}
diff --git a/auxiliary/svd/align1/house_row.cl b/auxiliary/svd/align1/house_row.cl
deleted file mode 100644
index a37b2fe..0000000
--- a/auxiliary/svd/align1/house_row.cl
+++ /dev/null
@@ -1,71 +0,0 @@
-// calculates a sum of local array elements
-void row_reduce_lcl_array(__local float* sums, uint lcl_id, uint lcl_sz) {
- uint step = lcl_sz >> 1;
-
- while(step > 0) {
- if(lcl_id < step) {
- sums[lcl_id] += sums[lcl_id + step];
- }
- step >>= 1;
- barrier(CLK_LOCAL_MEM_FENCE);
- }
-}
-
-
-__kernel void house_row(__global float* A,
- __global float* QR,
- __global float* V, // householder vector
- uint row_start,
- uint col_start,
- uint size1,
- uint size2,
- uint stride,
- uint strideQ,
- __local float* sums
- ) {
-
- uint glb_id = get_global_id(0);
-
- uint grp_id = get_group_id(0);
- uint grp_nm = get_num_groups(0);
-
- uint lcl_id = get_local_id(0);
- uint lcl_sz = get_local_size(0);
-
- float ss = 0.0f;
-
- // update of QR matrix
- // Actually, we are calculating a transpose of right matrix. This allows to avoid cache
- // misses.
- for(uint i = grp_id; i < size2; i += grp_nm) {
- ss = 0.0f;
- for(uint j = lcl_id; j < size2; j += lcl_sz) ss = ss + (V[j] * QR[i * strideQ + j]);
- sums[lcl_id] = ss;
-
- barrier(CLK_LOCAL_MEM_FENCE);
- row_reduce_lcl_array(sums, lcl_id, lcl_sz);
- barrier(CLK_LOCAL_MEM_FENCE);
-
- float sum_Qv = sums[0];
- for(uint j = lcl_id; j < size2; j += lcl_sz)
- QR[i * strideQ + j] = QR[i * strideQ + j] - (2 * V[j] * sum_Qv);
- }
-
- // update of A matrix
- for(uint i = grp_id + row_start; i < size1; i += grp_nm) {
- ss = 0.0f;
-
- for(uint j = lcl_id; j < size2; j += lcl_sz) ss = ss + (V[j] * A[i * stride + j]);
- sums[lcl_id] = ss;
-
- barrier(CLK_LOCAL_MEM_FENCE);
- row_reduce_lcl_array(sums, lcl_id, lcl_sz);
- barrier(CLK_LOCAL_MEM_FENCE);
-
- float sum_Av = sums[0];
-
- for(uint j = lcl_id; j < size2; j += lcl_sz)
- A[i * stride + j] = A[i * stride + j] - (2 * V[j] * sum_Av);
- }
-}
-
diff --git a/auxiliary/svd/align1/inverse_signs.cl b/auxiliary/svd/align1/inverse_signs.cl
deleted file mode 100644
index 223539f..0000000
--- a/auxiliary/svd/align1/inverse_signs.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-__kernel void inverse_signs(__global float* v,
- __global float* signs,
- uint size,
- uint stride
- )
-{
- uint glb_id_x = get_global_id(0);
- uint glb_id_y = get_global_id(1);
-
- if((glb_id_x < size) && (glb_id_y < size))
- v[glb_id_x * stride + glb_id_y] *= signs[glb_id_x];
-}
-
diff --git a/auxiliary/svd/align1/transpose_inplace.cl b/auxiliary/svd/align1/transpose_inplace.cl
deleted file mode 100644
index 521152b..0000000
--- a/auxiliary/svd/align1/transpose_inplace.cl
+++ /dev/null
@@ -1,25 +0,0 @@
-
-
-
-__kernel void transpose_inplace(__global float* input,
- unsigned int row_num,
- unsigned int col_num) {
- unsigned int size = row_num * col_num;
- for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {
- unsigned int row = i / col_num;
- unsigned int col = i - row*col_num;
-
- unsigned int new_pos = col * row_num + row;
-
- //new_pos = col < row?0:1;
- //input[i] = new_pos;
-
- if(i < new_pos) {
- float val = input[i];
- input[i] = input[new_pos];
- input[new_pos] = val;
- }
- }
-}
-
-
diff --git a/auxiliary/vector/align1/add.cl b/auxiliary/vector/align1/add.cl
deleted file mode 100644
index 591cb54..0000000
--- a/auxiliary/vector/align1/add.cl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-__kernel void add(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] + vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/assign.cl b/auxiliary/vector/align1/assign.cl
deleted file mode 100644
index 05dba1a..0000000
--- a/auxiliary/vector/align1/assign.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-
-__kernel void assign(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] = vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/clear.cl b/auxiliary/vector/align1/clear.cl
deleted file mode 100644
index 1dc93e1..0000000
--- a/auxiliary/vector/align1/clear.cl
+++ /dev/null
@@ -1,11 +0,0 @@
-
-__kernel void clear(
- __global float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec[i*inc1+start1] = 0;
-}
-
diff --git a/auxiliary/vector/align1/cpu_inplace_mul_add.cl b/auxiliary/vector/align1/cpu_inplace_mul_add.cl
deleted file mode 100644
index fb12b39..0000000
--- a/auxiliary/vector/align1/cpu_inplace_mul_add.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-
-__kernel void cpu_inplace_mul_add(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- float factor)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] += vec2[i*inc2+start2] * factor;
-}
-
diff --git a/auxiliary/vector/align1/cpu_inplace_mult.cl b/auxiliary/vector/align1/cpu_inplace_mult.cl
deleted file mode 100644
index df5eca0..0000000
--- a/auxiliary/vector/align1/cpu_inplace_mult.cl
+++ /dev/null
@@ -1,12 +0,0 @@
-
-__kernel void cpu_inplace_mult(
- __global float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- float factor)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec[i*inc1+start1] *= factor;
-}
-
diff --git a/auxiliary/vector/align1/cpu_mul_add.cl b/auxiliary/vector/align1/cpu_mul_add.cl
deleted file mode 100644
index 7c02900..0000000
--- a/auxiliary/vector/align1/cpu_mul_add.cl
+++ /dev/null
@@ -1,21 +0,0 @@
-
-__kernel void cpu_mul_add(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- float factor,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3
- )
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] * factor + vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/cpu_mult.cl b/auxiliary/vector/align1/cpu_mult.cl
deleted file mode 100644
index e25e4da..0000000
--- a/auxiliary/vector/align1/cpu_mult.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-__kernel void cpu_mult(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- float factor,
- __global float * result,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc2+start2] = vec[i*inc1+start1] * factor;
-}
-
-
diff --git a/auxiliary/vector/align1/diag_precond.cl b/auxiliary/vector/align1/diag_precond.cl
deleted file mode 100644
index abaa6f3..0000000
--- a/auxiliary/vector/align1/diag_precond.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-
-__kernel void diag_precond(
- __global const float * diag_A_inv,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global float * x,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- x[i*inc2+start2] *= diag_A_inv[i*inc1+start1];
-}
diff --git a/auxiliary/vector/align1/divide.cl b/auxiliary/vector/align1/divide.cl
deleted file mode 100644
index f013c53..0000000
--- a/auxiliary/vector/align1/divide.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-// Note: name 'div' is not allowed by the jit-compiler
-__kernel void divide(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac, //note: CPU variant is mapped to prod_scalar
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc1+start3] = vec[i*inc1+start1] / factor;
-}
-
diff --git a/auxiliary/vector/align1/index_norm_inf.cl b/auxiliary/vector/align1/index_norm_inf.cl
deleted file mode 100644
index a3e415b..0000000
--- a/auxiliary/vector/align1/index_norm_inf.cl
+++ /dev/null
@@ -1,58 +0,0 @@
-//index_norm_inf:
-unsigned int float_vector1_index_norm_inf_impl(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * float_buffer,
- __local unsigned int * index_buffer)
-{
- //step 1: fill buffer:
- float cur_max = 0.0f;
- float tmp;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- {
- tmp = fabs(vec[i*inc1+start1]);
- if (cur_max < tmp)
- {
- float_buffer[get_global_id(0)] = tmp;
- index_buffer[get_global_id(0)] = i;
- cur_max = tmp;
- }
- }
-
- //step 2: parallel reduction:
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- if (get_global_id(0) < stride)
- {
- //find the first occurring index
- if (float_buffer[get_global_id(0)] < float_buffer[get_global_id(0)+stride])
- {
- index_buffer[get_global_id(0)] = index_buffer[get_global_id(0)+stride];
- float_buffer[get_global_id(0)] = float_buffer[get_global_id(0)+stride];
- }
-
- //index_buffer[get_global_id(0)] = float_buffer[get_global_id(0)] < float_buffer[get_global_id(0)+stride] ? index_buffer[get_global_id(0)+stride] : index_buffer[get_global_id(0)];
- //float_buffer[get_global_id(0)] = max(float_buffer[get_global_id(0)], float_buffer[get_global_id(0)+stride]);
- }
- }
-
- return index_buffer[0];
-}
-
-__kernel void index_norm_inf(
- __global float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * float_buffer,
- __local unsigned int * index_buffer,
- global unsigned int * result)
-{
- unsigned int tmp = float_vector1_index_norm_inf_impl(vec, start1, inc1, size1, float_buffer, index_buffer);
- if (get_global_id(0) == 0) *result = tmp;
-}
-
-
diff --git a/auxiliary/vector/align1/inner_prod.cl b/auxiliary/vector/align1/inner_prod.cl
deleted file mode 100644
index 37e3714..0000000
--- a/auxiliary/vector/align1/inner_prod.cl
+++ /dev/null
@@ -1,64 +0,0 @@
-
-//helper:
-void helper_inner_prod_parallel_reduction( __local float * tmp_buffer )
-{
- for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- if (get_local_id(0) < stride)
- tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0)+stride];
- }
-}
-
-//////// inner products:
-float impl_inner_prod(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __local float * tmp_buffer)
-{
- float tmp = 0;
- for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0))
- tmp += vec1[i*inc1+start1] * vec2[i*inc2+start2];
- tmp_buffer[get_local_id(0)] = tmp;
-
- helper_inner_prod_parallel_reduction(tmp_buffer);
-
- return tmp_buffer[0];
-}
-
-
-__kernel void inner_prod(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __local float * tmp_buffer,
- global float * group_buffer)
-{
- float tmp = impl_inner_prod(vec1,
- ( get_group_id(0) * size1) / get_num_groups(0) * inc1 + start1,
- inc1,
- ((get_group_id(0) + 1) * size1) / get_num_groups(0)
- - ( get_group_id(0) * size1) / get_num_groups(0),
- vec2,
- ( get_group_id(0) * size2) / get_num_groups(0) * inc2 + start2,
- inc2,
- ((get_group_id(0) + 1) * size2) / get_num_groups(0)
- - ( get_group_id(0) * size2) / get_num_groups(0),
- tmp_buffer);
-
- if (get_local_id(0) == 0)
- group_buffer[get_group_id(0)] = tmp;
-
-}
-
diff --git a/auxiliary/vector/align1/inplace_add.cl b/auxiliary/vector/align1/inplace_add.cl
deleted file mode 100644
index d9e55f4..0000000
--- a/auxiliary/vector/align1/inplace_add.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-
-__kernel void inplace_add(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] += vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/inplace_div_add.cl b/auxiliary/vector/align1/inplace_div_add.cl
deleted file mode 100644
index 6df0494..0000000
--- a/auxiliary/vector/align1/inplace_div_add.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-///// divide add:
-__kernel void inplace_div_add(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac) //CPU variant is mapped to mult_add
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2] / factor;
-}
\ No newline at end of file
diff --git a/auxiliary/vector/align1/inplace_div_sub.cl b/auxiliary/vector/align1/inplace_div_sub.cl
deleted file mode 100644
index 1c23d6e..0000000
--- a/auxiliary/vector/align1/inplace_div_sub.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-///// divide substract:
-__kernel void inplace_div_sub(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac) //CPU variant is mapped to mult_add
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2] / factor;
-}
-
diff --git a/auxiliary/vector/align1/inplace_divide.cl b/auxiliary/vector/align1/inplace_divide.cl
deleted file mode 100644
index b940b01..0000000
--- a/auxiliary/vector/align1/inplace_divide.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-
-__kernel void inplace_divide(
- __global float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac) //note: CPU variant is mapped to prod_scalar
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec[i*inc1+start1] /= factor;
-}
-
diff --git a/auxiliary/vector/align1/inplace_mul_add.cl b/auxiliary/vector/align1/inplace_mul_add.cl
deleted file mode 100644
index 20c61b8..0000000
--- a/auxiliary/vector/align1/inplace_mul_add.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-__kernel void inplace_mul_add(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac)
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] += vec2[i*inc2+start2] * factor;
-}
-
-
diff --git a/auxiliary/vector/align1/inplace_mul_sub.cl b/auxiliary/vector/align1/inplace_mul_sub.cl
deleted file mode 100644
index 0882e1c..0000000
--- a/auxiliary/vector/align1/inplace_mul_sub.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-__kernel void inplace_mul_sub(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac) //CPU variant is mapped to mult_add
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2] * factor;
-}
-
-
diff --git a/auxiliary/vector/align1/inplace_mult.cl b/auxiliary/vector/align1/inplace_mult.cl
deleted file mode 100644
index be10b5b..0000000
--- a/auxiliary/vector/align1/inplace_mult.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-
-__kernel void inplace_mult(
- __global float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac)
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec[i*inc1+start1] *= factor;
-}
-
-
diff --git a/auxiliary/vector/align1/inplace_sub.cl b/auxiliary/vector/align1/inplace_sub.cl
deleted file mode 100644
index 31abe48..0000000
--- a/auxiliary/vector/align1/inplace_sub.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-
-__kernel void inplace_sub(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/mul_add.cl b/auxiliary/vector/align1/mul_add.cl
deleted file mode 100644
index 5084168..0000000
--- a/auxiliary/vector/align1/mul_add.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-
-__kernel void mul_add(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3
- )
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] * factor + vec2[i*inc2+start2];
-}
-
-
diff --git a/auxiliary/vector/align1/mul_sub.cl b/auxiliary/vector/align1/mul_sub.cl
deleted file mode 100644
index bc11d3c..0000000
--- a/auxiliary/vector/align1/mul_sub.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-
-///// multiply subtract:
-__kernel void mul_sub(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3
- )
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] * factor - vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/mult.cl b/auxiliary/vector/align1/mult.cl
deleted file mode 100644
index b6b302e..0000000
--- a/auxiliary/vector/align1/mult.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-__kernel void mult(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac,
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- float factor = *fac;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc3+start3] = vec[i*inc1+start1] * factor;
-}
-
diff --git a/auxiliary/vector/align1/norm_1.cl b/auxiliary/vector/align1/norm_1.cl
deleted file mode 100644
index 341457d..0000000
--- a/auxiliary/vector/align1/norm_1.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-//helper:
-void helper_norm1_parallel_reduction( __local float * tmp_buffer )
-{
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- if (get_global_id(0) < stride)
- tmp_buffer[get_global_id(0)] += tmp_buffer[get_global_id(0)+stride];
- }
-}
-
-////// norm_1
-float impl_norm_1(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * tmp_buffer)
-{
- float tmp = 0;
- for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0))
- tmp += fabs(vec[i*inc1 + start1]);
-
- tmp_buffer[get_local_id(0)] = tmp;
-
- helper_norm1_parallel_reduction(tmp_buffer);
-
- return tmp_buffer[0];
-};
-
-__kernel void norm_1(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * tmp_buffer,
- global float * group_buffer)
-{
- float tmp = impl_norm_1(vec,
- ( get_group_id(0) * size1) / get_num_groups(0) * inc1 + start1,
- inc1,
- ((get_group_id(0) + 1) * size1) / get_num_groups(0)
- - ( get_group_id(0) * size1) / get_num_groups(0),
- tmp_buffer);
-
- if (get_local_id(0) == 0)
- group_buffer[get_group_id(0)] = tmp;
-}
-
diff --git a/auxiliary/vector/align1/norm_2.cl b/auxiliary/vector/align1/norm_2.cl
deleted file mode 100644
index 1e9b2fc..0000000
--- a/auxiliary/vector/align1/norm_2.cl
+++ /dev/null
@@ -1,52 +0,0 @@
-//helper:
-void helper_norm2_parallel_reduction( __local float * tmp_buffer )
-{
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- if (get_global_id(0) < stride)
- tmp_buffer[get_global_id(0)] += tmp_buffer[get_global_id(0)+stride];
- }
-}
-
-////// norm_2
-float impl_norm_2(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * tmp_buffer)
-{
- float tmp = 0;
- float vec_entry = 0;
- for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0))
- {
- vec_entry = vec[i*inc1 + start1];
- tmp += vec_entry * vec_entry;
- }
- tmp_buffer[get_local_id(0)] = tmp;
-
- helper_norm2_parallel_reduction(tmp_buffer);
-
- return tmp_buffer[0];
-};
-
-__kernel void norm_2(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * tmp_buffer,
- global float * group_buffer)
-{
- float tmp = impl_norm_2(vec,
- ( get_group_id(0) * size1) / get_num_groups(0) + start1,
- inc1,
- ((get_group_id(0) + 1) * size1) / get_num_groups(0)
- - ( get_group_id(0) * size1) / get_num_groups(0),
- tmp_buffer);
-
- if (get_local_id(0) == 0)
- group_buffer[get_group_id(0)] = tmp;
-}
-
diff --git a/auxiliary/vector/align1/norm_inf.cl b/auxiliary/vector/align1/norm_inf.cl
deleted file mode 100644
index 41a5dff..0000000
--- a/auxiliary/vector/align1/norm_inf.cl
+++ /dev/null
@@ -1,43 +0,0 @@
-
-////// norm_inf
-float impl_norm_inf(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * tmp_buffer)
-{
- float tmp = 0;
- for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0))
- tmp = fmax(fabs(vec[i*inc1 + start1]), tmp);
- tmp_buffer[get_local_id(0)] = tmp;
-
- //step 2: parallel reduction:
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- if (get_global_id(0) < stride)
- tmp_buffer[get_global_id(0)] = fmax(tmp_buffer[get_global_id(0)], tmp_buffer[get_global_id(0)+stride]);
- }
-
- return tmp_buffer[0];
-}
-
-__kernel void norm_inf(
- __global const float * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __local float * tmp_buffer,
- global float * group_buffer)
-{
- float tmp = impl_norm_inf(vec,
- ( get_group_id(0) * size1) / get_num_groups(0) + start1,
- inc1,
- ((get_group_id(0) + 1) * size1) / get_num_groups(0)
- - ( get_group_id(0) * size1) / get_num_groups(0),
- tmp_buffer);
-
- if (get_local_id(0) == 0)
- group_buffer[get_group_id(0)] = tmp;
-}
diff --git a/auxiliary/vector/align1/plane_rotation.cl b/auxiliary/vector/align1/plane_rotation.cl
deleted file mode 100644
index d6b64d7..0000000
--- a/auxiliary/vector/align1/plane_rotation.cl
+++ /dev/null
@@ -1,28 +0,0 @@
-
-////// plane rotation: (x,y) <- (\alpha x + \beta y, -\beta x + \alpha y)
-__kernel void plane_rotation(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- float alpha,
- float beta)
-{
- float tmp1 = 0;
- float tmp2 = 0;
-
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- {
- tmp1 = vec1[i*inc1+start1];
- tmp2 = vec2[i*inc2+start2];
-
- vec1[i*inc1+start1] = alpha * tmp1 + beta * tmp2;
- vec2[i*inc2+start2] = alpha * tmp2 - beta * tmp1;
- }
-
-}
-
diff --git a/auxiliary/vector/align1/sqrt_sum.cl b/auxiliary/vector/align1/sqrt_sum.cl
deleted file mode 100644
index 396ab24..0000000
--- a/auxiliary/vector/align1/sqrt_sum.cl
+++ /dev/null
@@ -1,22 +0,0 @@
-
-// helper kernel for norm_2
-__kernel void sqrt_sum(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global float * result)
-{
- //parallel reduction on global memory: (make sure get_global_size(0) is a power of 2)
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- if (get_global_id(0) < stride)
- vec1[get_global_id(0)*inc1+start1] += vec1[(get_global_id(0)+stride)*inc1+start1];
- barrier(CLK_GLOBAL_MEM_FENCE);
- }
-
- if (get_global_id(0) == 0)
- *result = sqrt(vec1[start1]);
-
-}
-
diff --git a/auxiliary/vector/align1/sub.cl b/auxiliary/vector/align1/sub.cl
deleted file mode 100644
index 28827ef..0000000
--- a/auxiliary/vector/align1/sub.cl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-__kernel void sub(
- __global const float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] - vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align1/sum.cl b/auxiliary/vector/align1/sum.cl
deleted file mode 100644
index ff0e1c0..0000000
--- a/auxiliary/vector/align1/sum.cl
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-__kernel void sum(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global float * result)
-{
- //parallel reduction on global memory (make sure get_global_size(0) is a power of 2)
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- if (get_global_id(0) < stride)
- vec1[get_global_id(0)*inc1+start1] += vec1[(get_global_id(0)+stride)*inc1+start1];
- barrier(CLK_GLOBAL_MEM_FENCE);
- }
-
- if (get_global_id(0) == 0)
- *result = vec1[start1];
-}
-
diff --git a/auxiliary/vector/align1/swap.cl b/auxiliary/vector/align1/swap.cl
deleted file mode 100644
index f0f0d81..0000000
--- a/auxiliary/vector/align1/swap.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-////// swap:
-__kernel void swap(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global float * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2
- )
-{
- float tmp;
- for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0))
- {
- tmp = vec2[i*inc2+start2];
- vec2[i*inc2+start2] = vec1[i*inc1+start1];
- vec1[i*inc1+start1] = tmp;
- }
-}
-
diff --git a/auxiliary/vector/align1/vmax.cl b/auxiliary/vector/align1/vmax.cl
deleted file mode 100644
index 9ce45d6..0000000
--- a/auxiliary/vector/align1/vmax.cl
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-__kernel void vmax(
- __global float * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global float * result)
-{
- //parallel reduction on global memory (make sure that size is a power of 2)
- for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2)
- {
- if (get_global_id(0) < stride)
- vec1[get_global_id(0)*inc1+start1] = fmax(vec1[(get_global_id(0)+stride)*inc1+start1],
- vec1[get_global_id(0)*inc1+start1]);
- barrier(CLK_GLOBAL_MEM_FENCE);
- }
-
- if (get_global_id(0) == 0)
- *result = vec1[start1];
-}
-
diff --git a/auxiliary/vector/align16/add.cl b/auxiliary/vector/align16/add.cl
deleted file mode 100644
index 1995acd..0000000
--- a/auxiliary/vector/align16/add.cl
+++ /dev/null
@@ -1,21 +0,0 @@
-
-__kernel void add(
- __global const float16 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float16 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float16 * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] + vec2[i*inc2+start2];
-}
-
-
diff --git a/auxiliary/vector/align16/cpu_inplace_mul.cl b/auxiliary/vector/align16/cpu_inplace_mul.cl
deleted file mode 100644
index f271f56..0000000
--- a/auxiliary/vector/align16/cpu_inplace_mul.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-
-__kernel void cpu_inplace_mult(
- __global float16 * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- float factor)
-{
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec[i*inc1+start1] *= factor;
-}
-
diff --git a/auxiliary/vector/align16/cpu_mult.cl b/auxiliary/vector/align16/cpu_mult.cl
deleted file mode 100644
index 5241da5..0000000
--- a/auxiliary/vector/align16/cpu_mult.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-__kernel void cpu_mult(
- __global const float16 * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- float factor,
- __global float16 * result,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc2+start2] = vec[i*inc1+start1] * factor;
-}
-
diff --git a/auxiliary/vector/align16/divide.cl b/auxiliary/vector/align16/divide.cl
deleted file mode 100644
index 0fc3ddb..0000000
--- a/auxiliary/vector/align16/divide.cl
+++ /dev/null
@@ -1,20 +0,0 @@
-
-//Note: 'div' cannot be used because of complaints by the jit-compiler
-__kernel void divide(
- __global const float16 * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac, //note: CPU variant is mapped to prod_scalar
- __global float16 * result,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- float factor = *fac;
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc2+start2] = vec[i*inc1+start1] / factor;
-}
-
-
diff --git a/auxiliary/vector/align16/inplace_add.cl b/auxiliary/vector/align16/inplace_add.cl
deleted file mode 100644
index a93dad2..0000000
--- a/auxiliary/vector/align16/inplace_add.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-
-__kernel void inplace_add(
- __global float16 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float16 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec1[i*inc1+start1] += vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align16/inplace_divide.cl b/auxiliary/vector/align16/inplace_divide.cl
deleted file mode 100644
index 76741c1..0000000
--- a/auxiliary/vector/align16/inplace_divide.cl
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-__kernel void inplace_divide(
- __global float16 * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac) //note: CPU variant is mapped to prod_scalar
-{
- float factor = *fac;
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec[i*inc1+start1] /= factor;
-}
-
diff --git a/auxiliary/vector/align16/inplace_mult.cl b/auxiliary/vector/align16/inplace_mult.cl
deleted file mode 100644
index 6e5edfe..0000000
--- a/auxiliary/vector/align16/inplace_mult.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-
-__kernel void inplace_mult(
- __global float16 * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac)
-{
- float factor = *fac;
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec[i*inc1+start1] *= factor;
-}
-
diff --git a/auxiliary/vector/align16/inplace_sub.cl b/auxiliary/vector/align16/inplace_sub.cl
deleted file mode 100644
index e452e3f..0000000
--- a/auxiliary/vector/align16/inplace_sub.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-__kernel void inplace_sub(
- __global float16 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float16 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2];
-}
-
-
diff --git a/auxiliary/vector/align16/mult.cl b/auxiliary/vector/align16/mult.cl
deleted file mode 100644
index d55e667..0000000
--- a/auxiliary/vector/align16/mult.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-__kernel void mult(
- __global const float16 * vec,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac,
- __global float16 * result,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2)
-{
- float factor = *fac;
- unsigned int i_end = size1/16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc2+start2] = vec[i*inc1+start1] * factor;
-}
-
diff --git a/auxiliary/vector/align16/sub.cl b/auxiliary/vector/align16/sub.cl
deleted file mode 100644
index 93e8077..0000000
--- a/auxiliary/vector/align16/sub.cl
+++ /dev/null
@@ -1,21 +0,0 @@
-
-__kernel void sub(
- __global const float16 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float16 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float16 * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- unsigned int i_end = size1 / 16;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] - vec2[i*inc2+start2];
-}
-
-
diff --git a/auxiliary/vector/align4/cpu_inplace_mul_add.cl b/auxiliary/vector/align4/cpu_inplace_mul_add.cl
deleted file mode 100644
index c71f129..0000000
--- a/auxiliary/vector/align4/cpu_inplace_mul_add.cl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-__kernel void cpu_inplace_mul_add(
- __global float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- float factor)
-{
- unsigned int i_end = size1/4;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec1[i*inc1+start1] += vec2[i*inc2+start2] * factor;
-}
-
diff --git a/auxiliary/vector/align4/cpu_mul_add.cl b/auxiliary/vector/align4/cpu_mul_add.cl
deleted file mode 100644
index 53a4a9f..0000000
--- a/auxiliary/vector/align4/cpu_mul_add.cl
+++ /dev/null
@@ -1,21 +0,0 @@
-
-__kernel void cpu_mul_add(
- __global const float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- float factor,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float4 * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- unsigned int i_end = size1/4;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] * factor + vec2[i*inc2+start2];
-}
-
diff --git a/auxiliary/vector/align4/inplace_div_add.cl b/auxiliary/vector/align4/inplace_div_add.cl
deleted file mode 100644
index 4cbf33d..0000000
--- a/auxiliary/vector/align4/inplace_div_add.cl
+++ /dev/null
@@ -1,20 +0,0 @@
-
-__kernel void inplace_div_add(
- __global float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac) //CPU variant is mapped to mult_add
-{
- float factor = *fac;
- unsigned int i_end = size1 / 4;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2] / factor;
-}
-
-
-
diff --git a/auxiliary/vector/align4/inplace_div_sub.cl b/auxiliary/vector/align4/inplace_div_sub.cl
deleted file mode 100644
index 3f73162..0000000
--- a/auxiliary/vector/align4/inplace_div_sub.cl
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-__kernel void inplace_div_sub(
- __global float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac) //CPU variant is mapped to mult_add
-{
- float factor = *fac;
- unsigned int i_end = size1/4;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2] / factor;
-}
-
-
diff --git a/auxiliary/vector/align4/inplace_mul_add.cl b/auxiliary/vector/align4/inplace_mul_add.cl
deleted file mode 100644
index 96618c9..0000000
--- a/auxiliary/vector/align4/inplace_mul_add.cl
+++ /dev/null
@@ -1,18 +0,0 @@
-
-__kernel void inplace_mul_add(
- __global float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac)
-{
- float factor = *fac;
- unsigned int size_div_4 = size1/4;
- for (unsigned int i = get_global_id(0); i < size_div_4; i += get_global_size(0))
- vec1[i*inc1+start1] += vec2[i*inc2+start2] * factor;
-}
-
diff --git a/auxiliary/vector/align4/inplace_mul_sub.cl b/auxiliary/vector/align4/inplace_mul_sub.cl
deleted file mode 100644
index 4c8c3bc..0000000
--- a/auxiliary/vector/align4/inplace_mul_sub.cl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-__kernel void inplace_mul_sub(
- __global float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global const float * fac) //CPU variant is mapped to mult_add
-{
- float factor = *fac;
- unsigned int i_end = size1/4;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- vec1[i*inc1+start1] -= vec2[i*inc2+start2] * factor;
-}
-
-
diff --git a/auxiliary/vector/align4/mul_add.cl b/auxiliary/vector/align4/mul_add.cl
deleted file mode 100644
index 0b074ae..0000000
--- a/auxiliary/vector/align4/mul_add.cl
+++ /dev/null
@@ -1,22 +0,0 @@
-
-__kernel void mul_add(
- __global const float4 * vec1,
- unsigned int start1,
- unsigned int inc1,
- unsigned int size1,
- __global const float * fac,
- __global const float4 * vec2,
- unsigned int start2,
- unsigned int inc2,
- unsigned int size2,
- __global float4 * result,
- unsigned int start3,
- unsigned int inc3,
- unsigned int size3)
-{
- float factor = *fac;
- unsigned int i_end = size1/4;
- for (unsigned int i = get_global_id(0); i < i_end; i += get_global_size(0))
- result[i*inc3+start3] = vec1[i*inc1+start1] * factor + vec2[i*inc2+start2];
-}
-
diff --git a/examples/tutorial/iterative-ublas.cpp~ b/examples/tutorial/iterative-ublas.cpp~
deleted file mode 100644
index a734afc..0000000
--- a/examples/tutorial/iterative-ublas.cpp~
+++ /dev/null
@@ -1,163 +0,0 @@
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-//
-// include necessary system headers
-//
-#include <iostream>
-
-//
-// Necessary to obtain a suitable performance in ublas
-#ifndef NDEBUG
- #define NDEBUG
-#endif
-
-
-//
-// ublas includes
-//
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/matrix_sparse.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/matrix_proxy.hpp>
-#include <boost/numeric/ublas/operation.hpp>
-#include <boost/numeric/ublas/operation_sparse.hpp>
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/lu.hpp>
-
-// Must be set if you want to use ViennaCL algorithms on ublas objects
-#define VIENNACL_HAVE_UBLAS 1
-
-//
-// ViennaCL includes
-//
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/bicgstab.hpp"
-#include "viennacl/linalg/gmres.hpp"
-#include "viennacl/io/matrix_market.hpp"
-
-// Some helper functions for this tutorial:
-#include "Random.hpp"
-#include "vector-io.hpp"
-
-/*
-*
-* Tutorial: Iterative solvers without OpenCL
-*
-*/
-using namespace boost::numeric;
-
-
-int main()
-{
- typedef float ScalarType;
-
- //
- // Set up some ublas objects
- //
- ublas::vector<ScalarType> rhs;
- ublas::vector<ScalarType> rhs2;
- ublas::vector<ScalarType> ref_result;
- ublas::vector<ScalarType> result;
- ublas::compressed_matrix<ScalarType> ublas_matrix;
-
- //
- // Read system from file
- //
- #ifdef _MSC_VER
- if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../../examples/testdata/mat65k.mtx"))
- #else
- if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx"))
- #endif
- {
- std::cout << "Error reading Matrix file" << std::endl;
- return 0;
- }
- //std::cout << "done reading matrix" << std::endl;
-
- #ifdef _MSC_VER
- if (!readVectorFromFile("../../examples/testdata/rhs65025.txt", rhs))
- #else
- if (!readVectorFromFile("../examples/testdata/rhs65025.txt", rhs))
- #endif
- {
- std::cout << "Error reading RHS file" << std::endl;
- return 0;
- }
- //std::cout << "done reading rhs" << std::endl;
-
- #ifdef _MSC_VER
- if (!readVectorFromFile("../../examples/testdata/result65025.txt", ref_result))
- #else
- if (!readVectorFromFile("../examples/testdata/result65025.txt", ref_result))
- #endif
- {
- std::cout << "Error reading Result file" << std::endl;
- return 0;
- }
- //std::cout << "done reading result" << std::endl;
-
-
- //
- // set up ILUT preconditioners for ViennaCL and ublas objects. Other preconditioners can also be used (see manual)
- //
- viennacl::linalg::ilut_precond< ublas::compressed_matrix<ScalarType> > ublas_ilut(ublas_matrix, viennacl::linalg::ilut_tag());
- viennacl::linalg::ilu0_precond< ublas::compressed_matrix<ScalarType> > ublas_ilu0(ublas_matrix, viennacl::linalg::ilu0_tag());
- viennacl::linalg::block_ilu_precond< ublas::compressed_matrix<ScalarType>,
- viennacl::linalg::ilu0_tag> ublas_block_ilu0(ublas_matrix, viennacl::linalg::ilu0_tag());
-
- //
- // Conjugate gradient solver:
- //
- std::cout << "----- CG Test -----" << std::endl;
-
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::cg_tag());
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::cg_tag(1e-6, 20), ublas_ilut);
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::cg_tag(1e-6, 20), ublas_ilu0);
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::cg_tag(1e-6, 20), ublas_block_ilu0);
-
-
- //
- // Stabilized BiConjugate gradient solver:
- //
- std::cout << "----- BiCGStab Test -----" << std::endl;
-
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::bicgstab_tag()); //without preconditioner
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::bicgstab_tag(1e-6, 20), ublas_ilut); //with preconditioner
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::bicgstab_tag(1e-6, 20), ublas_ilu0); //with preconditioner
-
- //
- // GMRES solver:
- //
- std::cout << "----- GMRES Test -----" << std::endl;
-
- //
- // for ublas objects:
- //
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::gmres_tag()); //without preconditioner
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::gmres_tag(1e-6, 20), ublas_ilut);//with preconditioner
- result = viennacl::linalg::solve(ublas_matrix, rhs, viennacl::linalg::gmres_tag(1e-6, 20), ublas_ilu0);//with preconditioner
-
- //
- // That's it.
- //
- std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
-
- return 0;
-}
-
diff --git a/tests/src/generator_inner_product.cpp b/tests/src/generator_inner_product.cpp
deleted file mode 100644
index ecca138..0000000
--- a/tests/src/generator_inner_product.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-//
-// *** System
-//
-#include <iostream>
-
-//
-// *** Boost
-//
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/vector.hpp>
-
-
-//
-// *** ViennaCL
-//
-// #define VIENNACL_DEBUG_ALL
-// #define VIENNACL_DEBUG_BUILD
-// #define VIENNACL_HAVE_UBLAS 1
-// #define VIENNACL_DEBUG_CUSTOM_OPERATION
-#include "viennacl/vector.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/norm_1.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/norm_inf.hpp"
-#include "viennacl/generator/custom_operation.hpp"
-
-using namespace boost::numeric;
-
-template <class TYPE>
-bool readVectorFromFile ( const std::string & filename, boost::numeric::ublas::vector<TYPE> & vec ) {
- std::ifstream file ( filename.c_str() );
-
- if ( !file ) return false;
-
- unsigned int size;
- file >> size;
-
- if ( size > 20000 ) //keep execution times short
- size = 20000;
- vec.resize ( size );
- for ( unsigned int i = 0; i < size; ++i ) {
- TYPE element;
- file >> element;
- vec[i] = element;
- }
-
- return true;
-}
-
-template <typename ScalarType>
-ScalarType diff ( ScalarType & s1, viennacl::scalar<ScalarType> & s2 ) {
- if ( s1 != s2 )
- return ( s1 - s2 ) / std::max ( fabs ( s1 ), fabs ( s2 ) );
- return 0;
-}
-
-template< typename NumericT,unsigned int Alignment, typename Epsilon >
-int test ( Epsilon const& epsilon, std::string vecfile, std::string resultfile ) {
- int retval = EXIT_SUCCESS;
-
- viennacl::scalar<NumericT> vcl_res ( 0 );
- ublas::vector<NumericT> vec;
- ublas::vector<NumericT> vec2;
-
- NumericT res;
-
- viennacl::generator::gpu_symbolic_scalar<0,NumericT> symres;
- viennacl::generator::symbolic_vector<1,NumericT,Alignment> symv;
- viennacl::generator::symbolic_vector<2,NumericT,Alignment> symv2;
- viennacl::generator::cpu_symbolic_scalar<3,NumericT> symscal;
- viennacl::generator::cpu_symbolic_scalar<2,NumericT> symscal2;
-
-
- if ( !readVectorFromFile<NumericT> ( vecfile, vec ) ) {
- std::cout << "Error reading vec file" << std::endl;
- retval = EXIT_FAILURE;
- }
-//
- std::cout << "Running tests for vector of size " << vec.size() << std::endl;
- std::cout << "----- Alignment " << Alignment << " -----" << std::endl;
-//
- viennacl::vector<NumericT,Alignment> vcl_vec ( vec.size() );
- viennacl::vector<NumericT,Alignment> vcl_vec2 ( vec.size() );
-//
- vec2 = vec;
- viennacl::copy ( vec.begin(), vec.end(), vcl_vec.begin() );
- viennacl::copy ( vec2.begin(), vec2.end(), vcl_vec2.begin() );
-
-// --------------------------------------------------------------------------
-
- std::cout << "testing inner product..." << std::endl;
-
- res = ublas::inner_prod ( vec, vec2 );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symres = inner_prod ( symv, symv2 ) ) ( vcl_res, vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( res, vcl_res ) ) > epsilon ) {
- std::cout << "# Error at operation: inner product" << std::endl;
- std::cout << " Diff " << fabs ( diff ( res, vcl_res ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inner product division..." << std::endl;
- res = ublas::inner_prod ( vec, vec2 ) /ublas::inner_prod ( vec, vec );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symres = inner_prod ( symv, symv2 ) /inner_prod ( symv,symv ) ) ( vcl_res, vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( res, vcl_res ) ) > epsilon ) {
- std::cout << "# Error at operation: inner product" << std::endl;
- std::cout << " diff: " << fabs ( diff ( res, vcl_res ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing scalar over inner product..." << std::endl;
- res = 4/ublas::inner_prod ( vec, vec );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symres = symscal2/inner_prod ( symv,symv ) ) ( vcl_res, vcl_vec, 4.0f ) );
- if ( fabs ( diff ( res, vcl_res ) ) > epsilon ) {
- std::cout << "# Error at operation: scalar over inner product" << std::endl;
- std::cout << " diff: " << fabs ( diff ( res, vcl_res ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inner_prod minus ( scal minus inner_prod ) " << std::endl;
- res = ublas::inner_prod ( vec, vec2 ) - ( 5.0f - inner_prod ( vec,vec2 ) );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symres = inner_prod ( symv, symv2 ) - ( symscal - inner_prod ( symv,symv2 ) ) ) ( vcl_res, vcl_vec, vcl_vec2, 5.0f ) );
- if ( fabs ( diff ( res, vcl_res ) ) > epsilon ) {
- std::cout << "# Error at operation: inner_prod minus ( scal minus inner_prod ) " << std::endl;
- std::cout << " diff: " << fabs ( diff ( res, vcl_res ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing nested inner product" << std::endl;
- res = ublas::inner_prod ( vec, ublas::inner_prod ( vec,vec2 ) * vec2 );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symres = inner_prod ( symv, inner_prod ( symv,symv2 ) * symv2 ) ) ( vcl_res, vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( res, vcl_res ) ) > epsilon ) {
- std::cout << "# Error at operation: nested inner product" << std::endl;
- std::cout << " diff: " << fabs ( diff ( res, vcl_res ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- return retval;
-}
-
-
-int main() {
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Inner Product" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- int retval = EXIT_SUCCESS;
-
- std::string vecfile ( "../examples/testdata/rhs65025.txt" );
- std::string resultfile ( "../examples/testdata/result65025.txt" );
-
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef float NumericT;
- NumericT epsilon = 1.0E-4;
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: float" << std::endl;
- retval = test<NumericT,1> ( epsilon, vecfile, resultfile );
-// retval = test<NumericT,4> ( epsilon, vecfile, resultfile );
- retval = test<NumericT,16> ( epsilon, vecfile, resultfile );
- if ( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
-}
diff --git a/tests/src/generator_matrix.cpp b/tests/src/generator_matrix.cpp
deleted file mode 100644
index 751255f..0000000
--- a/tests/src/generator_matrix.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-//
-// *** System
-//
-#include <iostream>
-
-//
-// *** Boost
-//
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/vector.hpp>
-
-//
-// *** ViennaCL
-//
-//#define VIENNACL_DEBUG_ALL
-#define VIENNACL_HAVE_UBLAS 1
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/direct_solve.hpp"
-#include "examples/tutorial/Random.hpp"
-#include "examples/benchmarks/benchmark-utils.hpp"
-#include "viennacl/generator/custom_operation.hpp"
-
-using namespace boost::numeric;
-
-const int matrix_size = 100;
-
-template <typename ScalarType, typename F, unsigned int ALIGNMENT>
-ScalarType diff ( ublas::matrix<ScalarType> & mat1, viennacl::matrix<ScalarType, F, ALIGNMENT> & mat2 ) {
- ublas::matrix<ScalarType> mat2_cpu ( mat2.size1(), mat2.size2() );
- copy ( mat2, mat2_cpu );
- ScalarType ret = 0;
- ScalarType act = 0;
-
- for ( unsigned int i = 0; i < mat2_cpu.size1(); ++i ) {
- for ( unsigned int j = 0; j < mat2_cpu.size2(); ++j ) {
- act = fabs ( mat2_cpu ( i,j ) - mat1 ( i,j ) ) / std::max ( fabs ( mat2_cpu ( i, j ) ), fabs ( mat1 ( i,j ) ) );
- if ( act > ret )
- ret = act;
- }
- }
- //std::cout << ret << std::endl;
- return ret;
-}
-
-template< typename NumericT, typename Epsilon >
-int test ( Epsilon const& epsilon ) {
-
- int retval = EXIT_SUCCESS;
-
- ublas::matrix<NumericT> mat ( matrix_size, matrix_size );
-
- NumericT cpu_scal = static_cast<NumericT> ( 42.1415 );
- viennacl::scalar<NumericT> gpu_scal = static_cast<NumericT> ( 42.1415 );
-
- viennacl::matrix<NumericT> vcl_mat ( matrix_size, matrix_size );
- viennacl::matrix<NumericT> vcl_mat2 ( matrix_size, matrix_size );
-
- viennacl::generator::symbolic_matrix<0,NumericT> symm;
- viennacl::generator::symbolic_matrix<1,NumericT> symm2;
-
- viennacl::generator::cpu_symbolic_scalar<1,NumericT> cpu_sym_scal2;
-
- viennacl::generator::gpu_symbolic_scalar<1,NumericT> gpu_sym_scal2;
-
- for ( unsigned int i = 0; i < mat.size1(); ++i )
- for ( unsigned int j = 0; j < mat.size2(); ++j )
- mat ( i,j ) = static_cast<NumericT> ( 0.1 ) * random<NumericT>();
-
- ublas::matrix<NumericT> mat2 ( mat ) ;
-
- viennacl::copy ( mat, vcl_mat );
- viennacl::copy ( mat2, vcl_mat2 );
-
- std::cout << "Testing addition..." << std::endl;
- mat = mat + mat2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm = symm + symm2 ) ( vcl_mat, vcl_mat2 ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "Testing inplace addition..." << std::endl;
- mat += mat2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm += symm2 ) ( vcl_mat, vcl_mat2 ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing substraction..." << std::endl;
- mat = mat - mat2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm = symm - symm2 ) ( vcl_mat, vcl_mat2 ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: substraction" << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "Testing inplace substraction..." << std::endl;
- mat -= mat2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm -= symm2 ) ( vcl_mat, vcl_mat2 ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- // --------------------------------------------------------------------------
-
- std::cout << "testing cpu scalar multiplication ..." << std::endl;
- mat = mat*cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm = symm*cpu_sym_scal2 ) ( vcl_mat, cpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: cpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace cpu scalar multiplication ..." << std::endl;
- mat *= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm *= cpu_sym_scal2 ) ( vcl_mat, cpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace cpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing cpu scalar division ..." << std::endl;
- mat = mat/cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm = symm/cpu_sym_scal2 ) ( vcl_mat, cpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: cpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace cpu scalar division ..." << std::endl;
- mat /= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm /= cpu_sym_scal2 ) ( vcl_mat, cpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace cpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing gpu scalar multiplication ..." << std::endl;
- mat = mat*cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm = symm*gpu_sym_scal2 ) ( vcl_mat, gpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: gpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace gpu scalar multiplication ..." << std::endl;
- mat *= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm *= gpu_sym_scal2 ) ( vcl_mat, gpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace gpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing gpu scalar division ..." << std::endl;
- mat = mat/cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm = symm/gpu_sym_scal2 ) ( vcl_mat, gpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: gpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace gpu scalar division ..." << std::endl;
- mat /= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symm /= gpu_sym_scal2 ) ( vcl_mat, gpu_scal ) );
- if ( fabs ( diff ( mat, vcl_mat ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace gpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( mat, vcl_mat ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- return retval;
-
-}
-
-int main() {
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Matrix" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- int retval = EXIT_SUCCESS;
-
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef float NumericT;
- NumericT epsilon = 1.0E-3;
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: float" << std::endl;
- std::cout << " layout: row-major" << std::endl;
- retval = test<NumericT> ( epsilon );
- if ( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
-}
diff --git a/tests/src/generator_matrix_vector_product.cpp b/tests/src/generator_matrix_vector_product.cpp
deleted file mode 100644
index bd59f25..0000000
--- a/tests/src/generator_matrix_vector_product.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-// #define VIENNACL_DEBUG_CUSTOM_OPERATION
-
-//
-
-
-// *** Boost
-//
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/matrix_sparse.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-
-
-
-//
-// *** ViennaCL
-//
-// #define VIENNACL_DEBUG_ALL
-// #define VIENNACL_DEBUG_BUILD
-// #define VIENNACL_HAVE_UBLAS 1
-// #define VIENNACL_DEBUG_CUSTOM_OPERATION
-
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/direct_solve.hpp"
-#include "examples/tutorial/Random.hpp"
-#include "viennacl/generator/custom_operation.hpp"
-
-//
-// -------------------------------------------------------------
-//
-using namespace boost::numeric;
-//
-// -------------------------------------------------------------
-//
-
-template <typename ScalarType, unsigned int Alignment>
-ScalarType diff ( ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType,Alignment> & v2 ) {
- ublas::vector<ScalarType> v2_cpu ( v2.size() );
- viennacl::copy( v2.begin(), v2.end(), v2_cpu.begin() );
- for ( unsigned int i=0; i<v1.size(); ++i ) {
- if ( std::max ( fabs ( v2_cpu[i] ), fabs ( v1[i] ) ) > 0 )
- v2_cpu[i] = fabs ( v2_cpu[i] - v1[i] ) / std::max ( fabs ( v2_cpu[i] ), fabs ( v1[i] ) );
- else
- v2_cpu[i] = 0.0;
- }
- return norm_inf ( v2_cpu );
-}
-
-template< typename NumericT, typename F,typename F2, unsigned int Alignment, typename Epsilon >
-int test ( Epsilon const& epsilon ) {
- int retval = EXIT_SUCCESS;
- static const unsigned int SIZE = 100;
- // --------------------------------------------------------------------------
- ublas::vector<NumericT> rhs ( SIZE );
- for ( unsigned int i = 0; i < rhs.size(); ++i )
- rhs ( i ) = random<NumericT>();
- ublas::vector<NumericT> rhs2 = rhs;
- ublas::vector<NumericT> result = ublas::scalar_vector<NumericT> ( SIZE, 1 );
- ublas::vector<NumericT> result2 = result;
- ublas::vector<NumericT> rhs_trans = rhs;
- rhs_trans.resize ( result.size(), true );
- ublas::vector<NumericT> result_trans = ublas::zero_vector<NumericT> ( rhs.size() );
-
-
-
- ublas::matrix<NumericT,F2> matrix ( result.size(), rhs.size() );
- for ( unsigned int i = 0; i < matrix.size1(); ++i )
- for ( unsigned int j = 0; j < matrix.size2(); ++j )
- matrix ( i,j ) = random<NumericT>();
-
-
- std::cout << "----- Alignment " << Alignment << " -----" << std::endl;
-
- viennacl::vector<NumericT,Alignment> vcl_rhs ( rhs.size() );
- viennacl::vector<NumericT,Alignment> vcl_rhs_trans ( rhs_trans.size() );
- viennacl::vector<NumericT,Alignment> vcl_result_trans ( result_trans.size() );
- viennacl::vector<NumericT,Alignment> vcl_result ( result.size() );
- viennacl::matrix<NumericT, F, Alignment> vcl_matrix ( rhs.size(), rhs.size() );
-
- viennacl::copy ( rhs.begin(), rhs.end(), vcl_rhs.begin() );
- viennacl::copy ( result, vcl_result );
- viennacl::copy ( matrix, vcl_matrix );
-
- // --------------------------------------------------------------------------
-
- viennacl::generator::symbolic_matrix<1,NumericT,F,Alignment> symm2;
-
- viennacl::generator::symbolic_vector<0,NumericT,Alignment> symv;
- viennacl::generator::symbolic_vector<2,NumericT,Alignment> symv3;
-
- // --------------------------------------------------------------------------
- std::cout << "matrix-vector product (no temporary)" << std::endl;
- result = ublas::prod ( matrix, rhs );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = prod ( symm2,symv3 ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: matrix-vector product (no temporary)" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "Prod times inprod (no temporary)" << std::endl;
- result = ublas::inner_prod ( rhs,rhs ) *ublas::prod ( matrix, rhs );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = inner_prod ( symv3,symv3 ) *prod ( symm2,symv3 ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
-
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: Prod times inprod" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "matrix-vector product (temporary)" << std::endl;
- result = ublas::prod ( matrix, result );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = prod ( symm2,symv ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
-
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: matrix-vector product (temporary)" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-//
- //--------------------------------------------------------------------------
- std::cout << "prod minus ( v minus prod ) " << std::endl;
- result = ublas::prod ( matrix, rhs ) - ( rhs - ublas::prod ( matrix,rhs ) ) ;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = prod ( symm2,symv3 ) - ( symv3 - prod ( symm2,symv3 ) ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: prod minus ( v minus prod )" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-//
- //--------------------------------------------------------------------------
- std::cout << "prod minus ( prod minus v ) " << std::endl;
- result = ublas::prod ( matrix, rhs ) - ( ublas::prod ( matrix,rhs ) - rhs ) ;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = prod ( symm2,symv3 ) - ( prod ( symm2,symv3 ) - symv3 ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: prod minus ( prod minus v )" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //--------------------------------------------------------------------------
- std::cout << "v minus ( prod minus v ) " << std::endl;
- result = rhs - ( ublas::prod ( matrix,rhs ) - rhs ) ;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv3 - ( prod ( symm2,symv3 ) - symv3 ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: v minus ( prod minus v )" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //--------------------------------------------------------------------------
- std::cout << "v minus ( v minus prod ) " << std::endl;
- result = rhs - ( rhs - ublas::prod ( matrix,rhs ) ) ;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv3 - ( symv3 - prod ( symm2,symv3 ) ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: v minus ( v minus prod )" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //--------------------------------------------------------------------------
- std::cout << "Nested matrix-vector product" << std::endl;
- result = ublas::prod ( matrix, ublas::vector<NumericT> ( ublas::prod ( matrix,result ) ) );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = prod ( symm2,prod ( symm2,symv ) ) ) ( vcl_result,vcl_matrix ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: nested matrix-vector product" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
-
-// --------------------------------------------------------------------------
- std::cout << "Double nested matrix-vector product" << std::endl;
- result = ublas::prod ( matrix,ublas::vector<NumericT> ( ublas::prod ( matrix, ublas::vector<NumericT> ( ublas::prod ( matrix,rhs ) ) ) ) );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = prod ( symm2,prod ( symm2,prod ( symm2,symv3 ) ) ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation: double nested matrix-vector product" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
-
- }
-
- /*std::cout << "Complicated mess..." << std::endl;
- result = result + ublas::prod ( matrix,result ) + ublas::inner_prod ( result, rhs ) *ublas::prod ( matrix,rhs );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + prod ( symm2,symv ) + inner_prod ( symv,symv3 ) *prod ( symm2,symv3 ) ) ( vcl_result,vcl_matrix,vcl_rhs ) );
- if ( fabs ( diff ( result, vcl_result ) ) > epsilon ) {
- std::cout << "# Error at operation : complicated mess" << std::endl;
- std::cout << " diff: " << fabs ( diff ( result, vcl_result ) ) << std::endl;
- retval = EXIT_FAILURE;
- }*/
-
- return retval;
-}
-int main() {
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Matrix-Vector Product" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- int retval = EXIT_SUCCESS;
-
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef float NumericT;
- NumericT epsilon = 1.0E-3;
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: float" << std::endl;
-
- std::cout << "---- Layout : Row Major" << std::endl;
- retval = test<NumericT, viennacl::row_major,ublas::row_major,1> ( epsilon );
- retval = test<NumericT, viennacl::row_major,ublas::row_major,16> ( epsilon );
-
- std::cout << "---- Layout : Column Major" << std::endl;
- retval = test<NumericT, viennacl::column_major,ublas::column_major,1> ( epsilon );
-// retval = test<NumericT, viennacl::column_major,ublas::column_major,16> ( epsilon );
-
- if ( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
-
- return retval;
-}
diff --git a/tests/src/generator_vector.cpp b/tests/src/generator_vector.cpp
deleted file mode 100644
index 2cf3cab..0000000
--- a/tests/src/generator_vector.cpp
+++ /dev/null
@@ -1,331 +0,0 @@
-//
-// *** System
-//
-#include <iostream>
-
-//
-// *** Boost
-//
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/vector.hpp>
-#include <boost/foreach.hpp>
-
-//
-// *** ViennaCL
-//
-//#define VIENNACL_DEBUG_ALL
-#define VIENNACL_HAVE_UBLAS 1
-#define VIENNACL_DEBUG_CUSTOM_OPERATION
-#include "viennacl/vector.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/norm_1.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/norm_inf.hpp"
-#include "viennacl/generator/custom_operation.hpp"
-#include "viennacl/generator/elementwise_modifier.hpp"
-#include "viennacl/generator/symbolic_types/convenience_typedef.hpp"
-
-using namespace boost::numeric;
-using namespace viennacl::generator;
-
-std::string my_modifier(){ return "1/exp(-X)" ; }
-
-template <class TYPE>
-bool readVectorFromFile ( const std::string & filename, boost::numeric::ublas::vector<TYPE> & vec ) {
- std::ifstream file ( filename.c_str() );
-
- if ( !file ) return false;
-
- unsigned int size;
- file >> size;
-
- if ( size > 20000 ) //keep execution times short
- size = 20000;
- vec.resize ( size );
-
- for ( unsigned int i = 0; i < size; ++i ) {
- TYPE element;
- file >> element;
- vec[i] = element;
- }
-
- return true;
-}
-
-template <typename ScalarType, unsigned int Alignment>
-ScalarType diff ( ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType,Alignment> & v2 ) {
- ublas::vector<ScalarType> v2_cpu ( v2.size() );
- viennacl::copy( v2.begin(), v2.end(), v2_cpu.begin() );
- for ( unsigned int i=0; i<v1.size(); ++i ) {
- if ( std::max ( fabs ( v2_cpu[i] ), fabs ( v1[i] ) ) > 0 )
- v2_cpu[i] = fabs ( v2_cpu[i] - v1[i] ) / std::max ( fabs ( v2_cpu[i] ), fabs ( v1[i] ) );
- else
- v2_cpu[i] = 0.0;
- }
- return norm_inf ( v2_cpu );
-}
-
-template< typename NumericT, unsigned int Alignment, typename Epsilon >
-int test ( Epsilon const& epsilon, std::string vecfile, std::string resultfile ) {
- int retval = EXIT_SUCCESS;
-
-
- ublas::vector<NumericT> vec;
- ublas::vector<NumericT> vec2;
-
- NumericT cpu_scal = static_cast<NumericT> ( 42.1415 );
- viennacl::scalar<NumericT> gpu_scal = static_cast<NumericT> ( 42.1415 );
-
- viennacl::generator::symbolic_vector<0,NumericT,Alignment> symv;
- viennacl::generator::symbolic_vector<1,NumericT,Alignment> symv2;
-
- viennacl::generator::cpu_symbolic_scalar<1,NumericT> cpu_sym_scal2;
- viennacl::generator::gpu_symbolic_scalar<1,NumericT> gpu_sym_scal2;
-
- viennacl::generator::cpu_symbolic_scalar<2,NumericT> cpu_sym_scal3;
- viennacl::generator::gpu_symbolic_scalar<2,NumericT> gpu_sym_scal3;
-
-
- if ( !readVectorFromFile<NumericT> ( vecfile, vec ) ) {
- std::cout << "Error reading vec file" << std::endl;
- retval = EXIT_FAILURE;
- }
-
-
- std::cout << "Running tests for vector of size " << vec.size() << std::endl;
- std::cout << "----- Alignment " << Alignment << " -----" << std::endl;
-
- viennacl::vector<NumericT,Alignment> vcl_vec ( vec.size() );
- viennacl::vector<NumericT,Alignment> vcl_vec2 ( vec.size() );
-
- vec2 = vec;
- viennacl::copy ( vec.begin(), vec.end(), vcl_vec.begin() );
- viennacl::copy ( vec2.begin(), vec2.end(), vcl_vec2.begin() );
-
- // --------------------------------------------------------------------------
-
-
- std::cout << "testing addition..." << std::endl;
- vec = ( vec - vec2 );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv - symv2 ) ( vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "Testing inplace addition..." << std::endl;
- vec += vec2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv += symv2 ) ( vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing substraction..." << std::endl;
- vec = vec - vec2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv - symv2 ) ( vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: substraction" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "Testing inplace substraction..." << std::endl;
- vec -= vec2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv -= symv2 ) ( vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- // --------------------------------------------------------------------------
-
- std::cout << "testing cpu scalar multiplication ..." << std::endl;
- vec = vec*cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv*cpu_sym_scal2 ) ( vcl_vec, cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: cpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace cpu scalar multiplication ..." << std::endl;
- vec *= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv *= cpu_sym_scal2 ) ( vcl_vec, cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace cpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing cpu scalar division ..." << std::endl;
- vec = vec/cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv/cpu_sym_scal2 ) ( vcl_vec, cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: cpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace cpu scalar division ..." << std::endl;
- vec /= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv /= cpu_sym_scal2 ) ( vcl_vec, cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace cpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing gpu scalar multiplication ..." << std::endl;
- vec = vec*cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv*gpu_sym_scal2 ) ( vcl_vec, gpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: gpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing cpu and gpu scalar multiplication ..." << std::endl;
- vec = cpu_scal*vec*cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = cpu_sym_scal2*symv*gpu_sym_scal3 ) ( vcl_vec, cpu_scal, gpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: cpu and gpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace gpu scalar multiplication ..." << std::endl;
- vec *= cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv *= gpu_sym_scal2 ) ( vcl_vec, gpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace gpu scalar multiplication " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing gpu scalar division ..." << std::endl;
- vec = vec/cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv/gpu_sym_scal2 ) ( vcl_vec, gpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: gpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing inplace gpu scalar division ..." << std::endl;
- vec /=cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv /= gpu_sym_scal2 ) ( vcl_vec, gpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: inplace gpu scalar division " << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- // --------------------------------------------------------------------------
-
- std::cout << "testing addition scalar multiplication..." << std::endl;
- vec = vec + cpu_scal*vec2;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + cpu_sym_scal3*symv2 ) ( vcl_vec, vcl_vec2, cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: addition scalar multiplication" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
-
- // --------------------------------------------------------------------------
- std::cout << "testing multiple addition..." << std::endl;
- vec = vec + vec2 + vec;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + symv2 + symv ) ( vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: multiple addition" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing substraction with parenthesis" << std::endl;
- vec = vec - ( vec2 - vec );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv - ( symv2 - symv ) ) ( vcl_vec, vcl_vec2 ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: substraction with parenthesis" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing tree expansion right minus" << std::endl;
- vec = vec + cpu_scal* ( vec2 - vec );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + cpu_sym_scal3* ( symv2 - symv ) ) ( vcl_vec, vcl_vec2,cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: tree expansion right minus" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing tree expansion right plus" << std::endl;
- vec = vec + cpu_scal* ( vec2 + vec );
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + cpu_sym_scal3* ( symv2 + symv ) ) ( vcl_vec, vcl_vec2,cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: tree expansion right plus" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing tree expansion left minus" << std::endl;
- vec = vec + ( vec2 - vec ) *cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + ( symv2 - symv ) *cpu_sym_scal3 ) ( vcl_vec, vcl_vec2,cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: tree expansion left minus" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "testing tree expansion left plus" << std::endl;
- vec = vec + ( vec2 + vec ) *cpu_scal;
- viennacl::ocl::enqueue ( viennacl::generator::custom_operation ( symv = symv + ( symv2 + symv ) *cpu_sym_scal3 ) ( vcl_vec, vcl_vec2,cpu_scal ) );
- if ( fabs ( diff ( vec, vcl_vec ) ) > epsilon ) {
- std::cout << "# Error at operation: tree expansion left plus" << std::endl;
- std::cout << " diff: " << fabs ( diff ( vec, vcl_vec ) ) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- return retval;
-}
-
-
-int main() {
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Vector" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- int retval = EXIT_SUCCESS;
-
- std::string vecfile ( "../examples/testdata/rhs65025.txt" );
- std::string resultfile ( "../examples/testdata/result65025.txt" );
-
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef float NumericT;
- NumericT epsilon = 1.0E-4;
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: float" << std::endl;
- retval = test<NumericT,1> ( epsilon, vecfile, resultfile );
-// retval &= test<NumericT,2> ( epsilon, vecfile, resultfile );
- retval &= test<NumericT,4> ( epsilon, vecfile, resultfile );
-// retval &= test<NumericT,8> ( epsilon, vecfile, resultfile );
- retval &= test<NumericT,16> ( epsilon, vecfile, resultfile );
- if ( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
-}
diff --git a/tests/src/matrix.cpp b/tests/src/matrix.cpp
deleted file mode 100644
index 0108760..0000000
--- a/tests/src/matrix.cpp
+++ /dev/null
@@ -1,533 +0,0 @@
-/* =========================================================================
- Copyright (c) 2010-2011, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-
-//
-// *** System
-//
-#include <iostream>
-
-//
-// *** Boost
-//
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/matrix_sparse.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/matrix_proxy.hpp>
-#include <boost/numeric/ublas/lu.hpp>
-#include <boost/numeric/ublas/io.hpp>
-
-//
-// *** ViennaCL
-//
-//#define VIENNACL_DEBUG_ALL
-#define VIENNACL_HAVE_UBLAS 1
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/direct_solve.hpp"
-#include "examples/tutorial/Random.hpp"
-
-//
-// -------------------------------------------------------------
-//
-using namespace boost::numeric;
-//
-// -------------------------------------------------------------
-//
-template <typename ScalarType>
-ScalarType diff(ScalarType & s1, viennacl::scalar<ScalarType> & s2)
-{
- if (s1 != s2)
- return (s1 - s2) / std::max(fabs(s1), fabs(s2));
- return 0;
-}
-
-template <typename ScalarType>
-ScalarType diff(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
-{
- ublas::vector<ScalarType> v2_cpu(v2.size());
- copy(v2.begin(), v2.end(), v2_cpu.begin());
-
- for (unsigned int i=0;i<v1.size(); ++i)
- {
- if ( std::max( fabs(v2_cpu[i]), fabs(v1[i]) ) > 0 )
- v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) );
- else
- v2_cpu[i] = 0.0;
- }
-
- return norm_inf(v2_cpu);
-}
-
-template <typename ScalarType, typename F, unsigned int ALIGNMENT>
-ScalarType diff(ublas::matrix<ScalarType> & mat1, viennacl::matrix<ScalarType, F, ALIGNMENT> & mat2)
-{
- ublas::matrix<ScalarType> mat2_cpu(mat2.size1(), mat2.size2());
- copy(mat2, mat2_cpu);
- ScalarType ret = 0;
- ScalarType act = 0;
-
- for (unsigned int i = 0; i < mat2_cpu.size1(); ++i)
- {
- for (unsigned int j = 0; j < mat2_cpu.size2(); ++j)
- {
- act = fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( fabs(mat2_cpu(i, j)), fabs(mat1(i,j)) );
- if (act > ret)
- ret = act;
- }
- }
- //std::cout << ret << std::endl;
- return ret;
-}
-
-//
-// -------------------------------------------------------------
-//
-template< typename NumericT, typename F, typename Epsilon >
-int test(Epsilon const& epsilon)
-{
- int retval = EXIT_SUCCESS;
-
- std::size_t num_rows = 121;
- std::size_t num_cols = 103;
-
- // --------------------------------------------------------------------------
- ublas::vector<NumericT> rhs(num_rows);
- for (unsigned int i = 0; i < rhs.size(); ++i)
- rhs(i) = random<NumericT>();
- ublas::vector<NumericT> rhs2 = rhs;
- ublas::vector<NumericT> result = ublas::scalar_vector<NumericT>(num_cols, NumericT(3.1415));
- ublas::vector<NumericT> result2 = result;
- ublas::vector<NumericT> rhs_trans = rhs;
- rhs_trans.resize(result.size(), true);
- ublas::vector<NumericT> result_trans = ublas::zero_vector<NumericT>(rhs.size());
-
-
- ublas::matrix<NumericT> matrix(result.size(), rhs.size());
-
- for (unsigned int i = 0; i < matrix.size1(); ++i)
- for (unsigned int j = 0; j < matrix.size2(); ++j)
- matrix(i,j) = static_cast<NumericT>(0.1) * random<NumericT>();
-
- viennacl::vector<NumericT> vcl_rhs(rhs.size());
- viennacl::vector<NumericT> vcl_rhs_trans(rhs_trans.size());
- viennacl::vector<NumericT> vcl_result_trans(result_trans.size());
- viennacl::vector<NumericT> vcl_result(result.size());
- viennacl::matrix<NumericT, F> vcl_matrix(result.size(), rhs.size());
-
- viennacl::copy(rhs.begin(), rhs.end(), vcl_rhs.begin());
- viennacl::copy(result, vcl_result);
- viennacl::copy(matrix, vcl_matrix);
-
- std::cout << "Matrix resizing (to larger)" << std::endl;
- matrix.resize(2*num_rows, 2*num_cols, true);
- for (unsigned int i = 0; i < matrix.size1(); ++i)
- {
- for (unsigned int j = (i<result.size() ? rhs.size() : 0); j < matrix.size2(); ++j)
- matrix(i,j) = 0;
- }
- vcl_matrix.resize(2*num_rows, 2*num_cols, true);
- viennacl::copy(vcl_matrix, matrix);
- if( fabs(diff(matrix, vcl_matrix)) > epsilon )
- {
- std::cout << "# Error at operation: matrix resize (to larger)" << std::endl;
- std::cout << " diff: " << fabs(diff(matrix, vcl_matrix)) << std::endl;
- return EXIT_FAILURE;
- }
-
- matrix(12, 14) = NumericT(1.9);
- matrix(19, 16) = NumericT(1.0);
- matrix (13, 15) = NumericT(-9);
- vcl_matrix(12, 14) = NumericT(1.9);
- vcl_matrix(19, 16) = NumericT(1.0);
- vcl_matrix (13, 15) = NumericT(-9);
-
- std::cout << "Matrix resizing (to smaller)" << std::endl;
- matrix.resize(result.size(), rhs.size(), true);
- vcl_matrix.resize(result.size(), rhs.size(), true);
- if( fabs(diff(matrix, vcl_matrix)) > epsilon )
- {
- std::cout << "# Error at operation: matrix resize (to smaller)" << std::endl;
- std::cout << " diff: " << fabs(diff(matrix, vcl_matrix)) << std::endl;
- return EXIT_FAILURE;
- }
-
-
- std::cout << "Matrix addition and subtraction" << std::endl;
- viennacl::matrix<NumericT, F> vcl_matrix2 = vcl_matrix;
- vcl_matrix2 += vcl_matrix;
- vcl_matrix2 -= vcl_matrix;
- vcl_matrix2 = vcl_matrix2 + vcl_matrix;
- vcl_matrix2 = vcl_matrix2 - vcl_matrix;
-
- if( fabs(diff(matrix, vcl_matrix2)) > epsilon )
- {
- std::cout << "# Error at operation: matrix addition and subtraction" << std::endl;
- std::cout << " diff: " << fabs(diff(matrix, vcl_matrix2)) << std::endl;
- return EXIT_FAILURE;
- }
-
- // --------------------------------------------------------------------------
- std::cout << "Rank 1 update" << std::endl;
- ublas::matrix<NumericT> matrix2 = matrix;
-
- matrix2 += ublas::outer_prod(result, rhs);
- vcl_matrix += viennacl::linalg::outer_prod(vcl_result, vcl_rhs);
- if( fabs(diff(matrix2, vcl_matrix)) > epsilon )
- {
- std::cout << "# Error at operation: rank 1 update" << std::endl;
- std::cout << " diff: " << fabs(diff(matrix2, vcl_matrix)) << std::endl;
- return EXIT_FAILURE;
- }
- // --------------------------------------------------------------------------
- std::cout << "Scaled rank 1 update" << std::endl;
- matrix2 += 4.2f * ublas::outer_prod(result, rhs);
- vcl_matrix += 2.1f * viennacl::linalg::outer_prod(vcl_result, vcl_rhs);
- vcl_matrix += viennacl::linalg::outer_prod(vcl_result, vcl_rhs) * 2.1f; //check proper compilation
- if( fabs(diff(matrix2, vcl_matrix)) > epsilon )
- {
- std::cout << "# Error at operation: scaled rank 1 update" << std::endl;
- std::cout << " diff: " << fabs(diff(matrix2, vcl_matrix)) << std::endl;
- return EXIT_FAILURE;
- }
-
- //reset vcl_matrix:
- viennacl::copy(matrix, vcl_matrix);
-
- // --------------------------------------------------------------------------
- std::cout << "Matrix-Vector product" << std::endl;
- result = viennacl::linalg::prod(matrix, rhs);
- vcl_result = viennacl::linalg::prod(vcl_matrix, vcl_rhs);
-
- for (std::size_t i=0; i<result.size(); ++i)
- {
- std::cout << rhs(i) << ", " << vcl_rhs(i) << ", " << result(i) << ", " << vcl_result(i) << std::endl;
- }
-
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: matrix-vector product" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
- // --------------------------------------------------------------------------
- std::cout << "Matrix-Vector product with scaled add" << std::endl;
- NumericT alpha = static_cast<NumericT>(2.786);
- NumericT beta = static_cast<NumericT>(1.432);
- viennacl::copy(rhs.begin(), rhs.end(), vcl_rhs.begin());
- viennacl::copy(result.begin(), result.end(), vcl_result.begin());
-
- result = alpha * viennacl::linalg::prod(matrix, rhs) + beta * result;
- vcl_result = alpha * viennacl::linalg::prod(vcl_matrix, vcl_rhs) + beta * vcl_result;
-
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
- // --------------------------------------------------------------------------
-
- viennacl::copy(rhs_trans.begin(), rhs_trans.end(), vcl_rhs_trans.begin());
- viennacl::copy(result_trans.begin(), result_trans.end(), vcl_result_trans.begin());
-
- std::cout << "Transposed Matrix-Vector product" << std::endl;
- result_trans = alpha * viennacl::linalg::prod(trans(matrix), rhs_trans);
- vcl_result_trans = alpha * viennacl::linalg::prod(trans(vcl_matrix), vcl_rhs_trans);
-
- if( fabs(diff(result_trans, vcl_result_trans)) > epsilon )
- {
- std::cout << "# Error at operation: transposed matrix-vector product" << std::endl;
- std::cout << " diff: " << fabs(diff(result_trans, vcl_result_trans)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl;
- result_trans = alpha * viennacl::linalg::prod(trans(matrix), rhs_trans) + beta * result_trans;
- vcl_result_trans = alpha * viennacl::linalg::prod(trans(vcl_matrix), vcl_rhs_trans) + beta * vcl_result_trans;
-
- if( fabs(diff(result_trans, vcl_result_trans)) > epsilon )
- {
- std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl;
- std::cout << " diff: " << fabs(diff(result_trans, vcl_result_trans)) << std::endl;
- retval = EXIT_FAILURE;
- }
- // --------------------------------------------------------------------------
-
- /////////////////// test direct solvers ////////////////////////////
-
- rhs.resize(40);
- matrix.resize(rhs.size(), rhs.size());
- result.resize(rhs.size());
-
- std::cout << "Resizing vcl_rhs..." << std::endl;
- vcl_rhs.resize(rhs.size());
- std::cout << "Resizing vcl_rhs done" << std::endl;
- vcl_matrix.resize(rhs.size(), rhs.size());
- std::cout << "Resizing vcl_result..." << std::endl;
- vcl_result.resize(rhs.size());
- std::cout << "Resizing vcl_result done" << std::endl;
-
- for (unsigned int i = 0; i < matrix.size1(); ++i)
- {
- for (unsigned int j = 0; j < matrix.size2(); ++j)
- matrix(i,j) = -random<NumericT>();
- rhs(i) = random<NumericT>();
- }
-
- //force unit diagonal
- for (unsigned int i = 0; i < matrix.size1(); ++i)
- matrix(i,i) = static_cast<NumericT>(3) + random<NumericT>();
-
- viennacl::copy(matrix, vcl_matrix);
- viennacl::copy(rhs, vcl_rhs);
-
- //upper triangular:
- std::cout << "Upper triangular solver" << std::endl;
- result = ublas::solve(matrix, rhs, ublas::upper_tag());
- vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::upper_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: upper triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //upper unit triangular:
- std::cout << "Upper unit triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(matrix, rhs, ublas::unit_upper_tag());
- vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::unit_upper_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: unit upper triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //lower triangular:
- std::cout << "Lower triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(matrix, rhs, ublas::lower_tag());
- vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::lower_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: lower triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //lower unit triangular:
- std::cout << "Lower unit triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(matrix, rhs, ublas::unit_lower_tag());
- vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::unit_lower_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: unit lower triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
-
-
-
-
- //transposed upper triangular:
- std::cout << "Transposed upper triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(trans(matrix), rhs, ublas::upper_tag());
- vcl_result = viennacl::linalg::solve(trans(vcl_matrix), vcl_rhs, viennacl::linalg::upper_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: upper triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //transposed upper unit triangular:
- std::cout << "Transposed unit upper triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(trans(matrix), rhs, ublas::unit_upper_tag());
- vcl_result = viennacl::linalg::solve(trans(vcl_matrix), vcl_rhs, viennacl::linalg::unit_upper_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: unit upper triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //transposed lower triangular:
- std::cout << "Transposed lower triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(trans(matrix), rhs, ublas::lower_tag());
- vcl_result = viennacl::linalg::solve(trans(vcl_matrix), vcl_rhs, viennacl::linalg::lower_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: lower triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
- //transposed lower unit triangular:
- std::cout << "Transposed unit lower triangular solver" << std::endl;
- viennacl::copy(rhs, vcl_rhs);
- result = ublas::solve(trans(matrix), rhs, ublas::unit_lower_tag());
- vcl_result = viennacl::linalg::solve(trans(vcl_matrix), vcl_rhs, viennacl::linalg::unit_lower_tag());
- if( fabs(diff(result, vcl_result)) > epsilon )
- {
- std::cout << "# Error at operation: unit lower triangular solver" << std::endl;
- std::cout << " diff: " << fabs(diff(result, vcl_result)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
-
- //full solver:
- std::cout << "Full solver" << std::endl;
- unsigned int lu_dim = 100;
- ublas::matrix<NumericT> square_matrix(lu_dim, lu_dim);
- ublas::vector<NumericT> lu_rhs(lu_dim);
- viennacl::matrix<NumericT, F> vcl_square_matrix(lu_dim, lu_dim);
- viennacl::vector<NumericT> vcl_lu_rhs(lu_dim);
-
- for (std::size_t i=0; i<lu_dim; ++i)
- for (std::size_t j=0; j<lu_dim; ++j)
- square_matrix(i,j) = -static_cast<NumericT>(0.5) * random<NumericT>();
-
- //put some more weight on diagonal elements:
- for (std::size_t j=0; j<lu_dim; ++j)
- {
- square_matrix(j,j) = static_cast<NumericT>(20.0) + random<NumericT>();
- lu_rhs(j) = random<NumericT>();
- }
-
- viennacl::copy(square_matrix, vcl_square_matrix);
- viennacl::copy(lu_rhs, vcl_lu_rhs);
-
- //ublas::
- ublas::lu_factorize(square_matrix);
- ublas::inplace_solve (square_matrix, lu_rhs, ublas::unit_lower_tag ());
- ublas::inplace_solve (square_matrix, lu_rhs, ublas::upper_tag ());
-
- // ViennaCL:
- viennacl::linalg::lu_factorize(vcl_square_matrix);
- //viennacl::copy(square_matrix, vcl_square_matrix);
- viennacl::linalg::lu_substitute(vcl_square_matrix, vcl_lu_rhs);
-
- if( fabs(diff(lu_rhs, vcl_lu_rhs)) > epsilon )
- {
- std::cout << "# Error at operation: dense solver" << std::endl;
- std::cout << " diff: " << fabs(diff(lu_rhs, vcl_lu_rhs)) << std::endl;
- retval = EXIT_FAILURE;
- }
-
-
-
- return retval;
-}
-//
-// -------------------------------------------------------------
-//
-int main()
-{
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Matrix" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- int retval = EXIT_SUCCESS;
-
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef float NumericT;
- NumericT epsilon = NumericT(1.0E-3);
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: float" << std::endl;
- std::cout << " layout: row-major" << std::endl;
- retval = test<NumericT, viennacl::row_major>(epsilon);
- if( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef float NumericT;
- NumericT epsilon = NumericT(1.0E-3);
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: float" << std::endl;
- std::cout << " layout: column-major" << std::endl;
- retval = test<NumericT, viennacl::column_major>(epsilon);
- if( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
-
- if( viennacl::ocl::current_device().double_support() )
- {
- {
- typedef double NumericT;
- NumericT epsilon = 1.0E-11;
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: double" << std::endl;
- std::cout << " layout: row-major" << std::endl;
- retval = test<NumericT, viennacl::row_major>(epsilon);
- if( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- {
- typedef double NumericT;
- NumericT epsilon = 1.0E-11;
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << epsilon << std::endl;
- std::cout << " numeric: double" << std::endl;
- std::cout << " layout: column-major" << std::endl;
- retval = test<NumericT, viennacl::column_major>(epsilon);
- if( retval == EXIT_SUCCESS )
- std::cout << "# Test passed" << std::endl;
- else
- return retval;
- }
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
- }
- return retval;
-}
diff --git a/tests/src/matrix_range.cpp b/tests/src/matrix_range.cpp
deleted file mode 100644
index c53470b..0000000
--- a/tests/src/matrix_range.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-/* =========================================================================
- Copyright (c) 2010-2011, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#define VIENNACL_HAVE_UBLAS
-//#define NDEBUG
-//#define VIENNACL_BUILD_INFO
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <cmath>
-#include <algorithm>
-#include <stdio.h>
-#include <time.h>
-//#include "../benchmarks/benchmark-utils.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/linalg/prod.hpp"
-/*#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/io/matrix_market.hpp"*/
-#include "viennacl/matrix_proxy.hpp"
-#include "viennacl/vector_proxy.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/io.hpp"
-
-
-template <typename VectorType, typename VCLVectorType>
-bool check_for_equality_vector(VectorType const & ublas_v, VCLVectorType const & vcl_v)
-{
- typedef typename VectorType::value_type value_type;
-
- boost::numeric::ublas::vector<value_type> vcl_v_cpu(vcl_v.size());
- viennacl::copy(vcl_v, vcl_v_cpu);
-
- for (std::size_t i=0; i<ublas_v.size(); ++i)
- {
- if (ublas_v(i) != vcl_v_cpu(i))
- {
- if ( std::abs(ublas_v(i) - vcl_v_cpu(i)) / std::max(ublas_v(i), vcl_v_cpu(i)) > 1e-5 )
- {
- std::cout << "Error at index (" << i << "): " << ublas_v(i) << " vs " << vcl_v_cpu(i) << std::endl;
- std::cout << ublas_v << std::endl;
- std::cout << vcl_v_cpu << std::endl;
- return false;
- }
- }
- }
- return true;
-}
-
-
-template <typename MatrixType, typename VCLMatrixType>
-bool check_for_equality(MatrixType const & ublas_A, VCLMatrixType const & vcl_A)
-{
- typedef typename MatrixType::value_type value_type;
-
- boost::numeric::ublas::matrix<value_type> vcl_A_cpu(vcl_A.size1(), vcl_A.size2());
- viennacl::copy(vcl_A, vcl_A_cpu);
-
- for (std::size_t i=0; i<ublas_A.size1(); ++i)
- {
- for (std::size_t j=0; j<ublas_A.size2(); ++j)
- {
- if (ublas_A(i,j) != vcl_A_cpu(i,j))
- {
- if ( std::abs(ublas_A(i,j) - vcl_A_cpu(i,j)) / std::max(ublas_A(i,j), vcl_A_cpu(i,j)) > 1e-5 )
- {
- std::cout << "Error at index (" << i << ", " << j << "): " << ublas_A(i,j) << " vs " << vcl_A_cpu(i,j) << std::endl;
- std::cout << ublas_A << std::endl;
- std::cout << vcl_A_cpu << std::endl;
- return false;
- }
- }
- }
- }
- return true;
-}
-
-
-
-
-template <typename T, typename ScalarType>
-int run_test()
-{
- //typedef float ScalarType;
- typedef boost::numeric::ublas::matrix<ScalarType> MatrixType;
- typedef boost::numeric::ublas::vector<ScalarType> VectorType;
-
- typedef viennacl::matrix<ScalarType, T> VCLMatrixType;
- typedef viennacl::vector<ScalarType> VCLVectorType;
-
- viennacl::scalar<ScalarType> gpu_pi = ScalarType(3.1415);
-
- std::size_t dim_large = 151;
- std::size_t dim_small = 37;
- //std::size_t dim_large = 35;
- //std::size_t dim_small = 17;
-
- //setup ublas objects:
- MatrixType ublas_A(dim_large, dim_large);
- for (std::size_t i=0; i<ublas_A.size1(); ++i)
- for (std::size_t j=0; j<ublas_A.size2(); ++j)
- ublas_A(i,j) = ScalarType((i+1) + (j+1)*(i+1));
-
- MatrixType ublas_B(dim_small, dim_small);
- for (std::size_t i=0; i<ublas_B.size1(); ++i)
- for (std::size_t j=0; j<ublas_B.size2(); ++j)
- ublas_B(i,j) = ScalarType((i+1) + (j+1)*(i+1));
-
- MatrixType ublas_C(dim_large, dim_small);
- for (std::size_t i=0; i<ublas_C.size1(); ++i)
- for (std::size_t j=0; j<ublas_C.size2(); ++j)
- ublas_C(i,j) = ScalarType((j+2) + (j+1)*(i+1));
-
- MatrixType ublas_D(dim_small, dim_large);
- for (std::size_t i=0; i<ublas_D.size1(); ++i)
- for (std::size_t j=0; j<ublas_D.size2(); ++j)
- ublas_D(i,j) = ScalarType((j+2) + (j+1)*(i+1));
-
- boost::numeric::ublas::range ublas_r1(0, dim_small);
- boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large);
- boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1);
- boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2);
-
- boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1);
- boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1);
-
- //Setup ViennaCL objects
- VCLMatrixType vcl_A(dim_large, dim_large);
- viennacl::copy(ublas_A, vcl_A);
- VCLMatrixType vcl_B(dim_small, dim_small);
- viennacl::copy(ublas_B, vcl_B);
- VCLMatrixType vcl_C(dim_large, dim_small);
- viennacl::copy(ublas_C, vcl_C);
- VCLMatrixType vcl_D(dim_small, dim_large);
- viennacl::copy(ublas_D, vcl_D);
-
- viennacl::range vcl_r1(0, dim_small);
- viennacl::range vcl_r2(dim_large - dim_small, dim_large);
- viennacl::matrix_range<VCLMatrixType> vcl_A_sub1(vcl_A, vcl_r1, vcl_r1);
- viennacl::matrix_range<VCLMatrixType> vcl_A_sub2(vcl_A, vcl_r2, vcl_r2);
-
- viennacl::matrix_range<VCLMatrixType> vcl_C_sub(vcl_C, vcl_r1, vcl_r1);
- viennacl::matrix_range<VCLMatrixType> vcl_D_sub(vcl_D, vcl_r1, vcl_r1);
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- ublas_A_sub1 = ublas_B;
- viennacl::copy(ublas_B, vcl_A_sub1);
- std::cout << "Testing upper left copy to A... ";
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- ublas_A_sub2 = ublas_B;
- viennacl::copy(ublas_B, vcl_A_sub2);
- std::cout << "Testing lower right copy to A... ";
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
-
- ublas_C_sub = ublas_B;
- viennacl::copy(ublas_B, vcl_C_sub);
- std::cout << "Testing upper copy to C... ";
- if (check_for_equality(ublas_C, vcl_C))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- ublas_D_sub = ublas_B;
- viennacl::copy(ublas_B, vcl_D_sub);
- std::cout << "Testing left copy to D... ";
- if (check_for_equality(ublas_D, vcl_D))
- std::cout << "PASSED!" << std::endl;
- else
- std::cout << std::endl << "TEST failed!";
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Testing upper left copy to A... ";
- if (check_for_equality(ublas_A_sub1, vcl_A_sub1))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing lower right copy to A... ";
- if (check_for_equality(ublas_A_sub2, vcl_A_sub2))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing upper copy to C... ";
- if (check_for_equality(ublas_C_sub, vcl_C_sub))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing left copy to D... ";
- if (check_for_equality(ublas_D_sub, vcl_D_sub))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 3: Addition //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A_sub2, vcl_A_sub2);
-
- std::cout << "Inplace add to submatrix: ";
- ublas_A_sub2 += ublas_A_sub2;
- vcl_A_sub2 += vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Inplace add to matrix: ";
- ublas_B += ublas_A_sub2;
- vcl_B += vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to submatrix: ";
- ublas_A_sub2 = ublas_A_sub2 + ublas_A_sub2;
- vcl_A_sub2 = vcl_A_sub2 + vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to matrix: ";
- ublas_B = ublas_A_sub2 + ublas_A_sub2;
- vcl_B = vcl_A_sub2 + vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 4: Subtraction //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A_sub2, vcl_A_sub2);
-
- std::cout << "Inplace add to submatrix: ";
- ublas_A_sub2 -= ublas_A_sub2;
- vcl_A_sub2 -= vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Inplace add to matrix: ";
- ublas_B -= ublas_A_sub2;
- vcl_B -= vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to submatrix: ";
- ublas_A_sub2 = ublas_A_sub2 - ublas_A_sub2;
- vcl_A_sub2 = vcl_A_sub2 - vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to matrix: ";
- ublas_B = ublas_A_sub2 - ublas_A_sub2;
- vcl_B = vcl_A_sub2 - vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 5: Scaling //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A, vcl_A);
-
- std::cout << "Multiplication with CPU scalar: ";
- ublas_A_sub2 *= ScalarType(3.1415);
- vcl_A_sub2 *= ScalarType(3.1415);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Multiplication with GPU scalar: ";
- ublas_A_sub2 *= gpu_pi;
- vcl_A_sub2 *= gpu_pi;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "Division with CPU scalar: ";
- ublas_A_sub2 /= ScalarType(3.1415);
- vcl_A_sub2 /= ScalarType(3.1415);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Division with GPU scalar: ";
- ublas_A_sub2 /= gpu_pi;
- vcl_A_sub2 /= gpu_pi;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 6: Matrix-Matrix Products //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Assigned C = A * B: ";
- ublas_A_sub1 = prod(ublas_C_sub, ublas_D_sub);
- vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Assigned C = A^T * B: ";
- ublas_A_sub1 = prod(trans(ublas_C_sub), ublas_D_sub);
- vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), vcl_D_sub);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Assigned C = A * B^T: ";
- ublas_A_sub1 = prod(ublas_C_sub, trans(ublas_D_sub));
- vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, trans(vcl_D_sub));
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Assigned C = A^T * B^T: ";
- ublas_A_sub1 = prod(trans(ublas_C_sub), trans(ublas_D_sub));
- vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), trans(vcl_D_sub));
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Inplace add of prod(): ";
- ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub);
- vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 7: Matrix-Vector Products //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- VectorType ublas_v1(dim_large);
- for (std::size_t i=0; i<ublas_v1.size(); ++i)
- ublas_v1(i) = i;
- boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub(ublas_v1, ublas_r1);
-
- VectorType ublas_v2(dim_large);
- for (std::size_t i=0; i<ublas_v2.size(); ++i)
- ublas_v2(i) = i - 5;
- boost::numeric::ublas::vector_range<VectorType> ublas_v2_sub(ublas_v2, ublas_r1);
-
-
- VCLVectorType vcl_v1(ublas_v1.size());
- viennacl::vector_range<VCLVectorType> vcl_v1_sub(vcl_v1, vcl_r1);
- VCLVectorType vcl_v2(ublas_v2.size());
- viennacl::vector_range<VCLVectorType> vcl_v2_sub(vcl_v2, vcl_r1);
- viennacl::copy(ublas_v1, vcl_v1);
- viennacl::copy(ublas_v2, vcl_v2);
- viennacl::copy(ublas_A_sub1, vcl_A_sub1);
-
-
- ublas_v2_sub = prod(ublas_A_sub1, ublas_v1_sub);
- vcl_v2_sub = viennacl::linalg::prod(vcl_A_sub1, vcl_v1_sub);
-
- if (check_for_equality_vector(ublas_v2, vcl_v2))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- return EXIT_SUCCESS;
-}
-
-int main (int argc, const char * argv[])
-{
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Matrix Range" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: float" << std::endl;
- if (run_test<viennacl::row_major, float>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- if (run_test<viennacl::column_major, float>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
-
-
- if( viennacl::ocl::current_device().double_support() )
- {
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: double" << std::endl;
-
- if (run_test<viennacl::row_major, double>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- if (run_test<viennacl::column_major, double>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
-
diff --git a/tests/src/matrix_slice.cpp b/tests/src/matrix_slice.cpp
deleted file mode 100644
index acdc09b..0000000
--- a/tests/src/matrix_slice.cpp
+++ /dev/null
@@ -1,563 +0,0 @@
-/* =========================================================================
- Copyright (c) 2010-2011, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#define VIENNACL_HAVE_UBLAS
-//#define NDEBUG
-//#define VIENNACL_BUILD_INFO
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <cmath>
-#include <algorithm>
-#include <stdio.h>
-#include <time.h>
-//#include "../benchmarks/benchmark-utils.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/linalg/prod.hpp"
-/*#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/io/matrix_market.hpp"*/
-#include "viennacl/matrix_proxy.hpp"
-#include "viennacl/vector_proxy.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/io.hpp"
-
-
-template <typename VectorType, typename VCLVectorType>
-bool check_for_equality_vector(VectorType const & ublas_v, VCLVectorType const & vcl_v)
-{
- typedef typename VectorType::value_type value_type;
-
- boost::numeric::ublas::vector<value_type> vcl_v_cpu(vcl_v.size());
- viennacl::copy(vcl_v, vcl_v_cpu);
-
- for (std::size_t i=0; i<ublas_v.size(); ++i)
- {
- if (ublas_v(i) != vcl_v_cpu(i))
- {
- if ( std::abs(ublas_v(i) - vcl_v_cpu(i)) / std::max(ublas_v(i), vcl_v_cpu(i)) > 1e-5 )
- {
- std::cout << "Error at index (" << i << "): " << ublas_v(i) << " vs " << vcl_v_cpu(i) << std::endl;
- std::cout << ublas_v << std::endl;
- std::cout << vcl_v_cpu << std::endl;
- return false;
- }
- }
- }
- return true;
-}
-
-
-template <typename MatrixType, typename VCLMatrixType>
-bool check_for_equality(MatrixType const & ublas_A, VCLMatrixType const & vcl_A)
-{
- typedef typename MatrixType::value_type value_type;
-
- boost::numeric::ublas::matrix<value_type> vcl_A_cpu(vcl_A.size1(), vcl_A.size2());
- viennacl::copy(vcl_A, vcl_A_cpu);
-
- for (std::size_t i=0; i<ublas_A.size1(); ++i)
- {
- for (std::size_t j=0; j<ublas_A.size2(); ++j)
- {
- if (ublas_A(i,j) != vcl_A_cpu(i,j))
- {
- if ( std::abs(ublas_A(i,j) - vcl_A_cpu(i,j)) / std::max(ublas_A(i,j), vcl_A_cpu(i,j)) > 1e-5 )
- {
- std::cout << "Error at index (" << i << ", " << j << "): " << ublas_A(i,j) << " vs " << vcl_A_cpu(i,j) << std::endl;
- std::cout << ublas_A << std::endl;
- std::cout << vcl_A_cpu << std::endl;
- return false;
- }
- }
- }
- }
- return true;
-}
-
-
-
-template <typename T, typename ScalarType>
-int run_test()
-{
- //typedef float ScalarType;
- typedef boost::numeric::ublas::matrix<ScalarType> MatrixType;
- typedef boost::numeric::ublas::vector<ScalarType> VectorType;
-
- typedef viennacl::matrix<ScalarType, T> VCLMatrixType;
- typedef viennacl::vector<ScalarType> VCLVectorType;
-
- viennacl::scalar<ScalarType> gpu_pi = ScalarType(3.1415);
-
- //std::size_t dim_large = 196;
- //std::size_t dim_small = 64;
- //std::size_t dim_large = 75; //Note: ensure dim_large > 2 * dim_small
- //std::size_t dim_small = 34;
-
- std::size_t dim_large = 75; //Note: ensure dim_large > 2 * dim_small
- std::size_t dim_small = 34;
-
- //setup ublas objects:
- MatrixType ublas_A(dim_large, dim_large);
- for (std::size_t i=0; i<ublas_A.size1(); ++i)
- for (std::size_t j=0; j<ublas_A.size2(); ++j)
- ublas_A(i,j) = ScalarType((i+1) + (j+1)*(i+1));
-
- MatrixType ublas_B(dim_small, dim_small);
- for (std::size_t i=0; i<ublas_B.size1(); ++i)
- for (std::size_t j=0; j<ublas_B.size2(); ++j)
- ublas_B(i,j) = ScalarType((i+1) + (j+1)*(i+1));
-
- MatrixType ublas_C(dim_large, 2 * dim_small);
- for (std::size_t i=0; i<ublas_C.size1(); ++i)
- for (std::size_t j=0; j<ublas_C.size2(); ++j)
- ublas_C(i,j) = ScalarType((j+2) + (j+1)*(i+1));
-
- MatrixType ublas_D(2 * dim_small, dim_large);
- for (std::size_t i=0; i<ublas_D.size1(); ++i)
- for (std::size_t j=0; j<ublas_D.size2(); ++j)
- ublas_D(i,j) = ScalarType((j+2) + (j+1)*(i+1));
-
- boost::numeric::ublas::slice ublas_s1(0, 2, dim_small);
- boost::numeric::ublas::slice ublas_s2(dim_large - 2 * dim_small, 2, dim_small);
- boost::numeric::ublas::matrix_slice<MatrixType> ublas_A_sub1(ublas_A, ublas_s1, ublas_s1);
- boost::numeric::ublas::matrix_slice<MatrixType> ublas_A_sub2(ublas_A, ublas_s2, ublas_s2);
-
- boost::numeric::ublas::matrix_slice<MatrixType> ublas_C_sub(ublas_C, ublas_s1, ublas_s1);
- boost::numeric::ublas::matrix_slice<MatrixType> ublas_D_sub(ublas_D, ublas_s1, ublas_s1);
-
- //Setup ViennaCL objects
- VCLMatrixType vcl_A(dim_large, dim_large);
- viennacl::copy(ublas_A, vcl_A);
- VCLMatrixType vcl_B(dim_small, dim_small);
- viennacl::copy(ublas_B, vcl_B);
- VCLMatrixType vcl_C(dim_large, 2 * dim_small);
- viennacl::copy(ublas_C, vcl_C);
- VCLMatrixType vcl_D(2 * dim_small, dim_large);
- viennacl::copy(ublas_D, vcl_D);
-
- viennacl::slice vcl_s1(0, 2, dim_small);
- viennacl::slice vcl_s2(dim_large - 2 * dim_small, 2, dim_small);
- viennacl::matrix_slice<VCLMatrixType> vcl_A_sub1(vcl_A, vcl_s1, vcl_s1);
- viennacl::matrix_slice<VCLMatrixType> vcl_A_sub2(vcl_A, vcl_s2, vcl_s2);
-
- viennacl::matrix_slice<VCLMatrixType> vcl_C_sub(vcl_C, vcl_s1, vcl_s1);
- viennacl::matrix_slice<VCLMatrixType> vcl_D_sub(vcl_D, vcl_s1, vcl_s1);
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Testing upper left copy to A... ";
- ublas_A_sub1 = ublas_B;
- viennacl::copy(ublas_B, vcl_A_sub1);
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "Testing lower right copy to A... ";
- ublas_A_sub2 = ublas_B;
- viennacl::copy(ublas_B, vcl_A_sub2);
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
-
- std::cout << "Testing upper copy to C... ";
- ublas_C_sub = ublas_B;
- viennacl::copy(ublas_B, vcl_C_sub);
- if (check_for_equality(ublas_C, vcl_C))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "Testing left copy to D... ";
- ublas_D_sub = ublas_B;
- viennacl::copy(ublas_B, vcl_D_sub);
- if (check_for_equality(ublas_D, vcl_D))
- std::cout << "PASSED!" << std::endl;
- else
- std::cout << std::endl << "TEST failed!";
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Testing upper left copy to A... ";
- if (check_for_equality(ublas_A_sub1, vcl_A_sub1))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing lower right copy to A... ";
- if (check_for_equality(ublas_A_sub2, vcl_A_sub2))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing upper copy to C... ";
- if (check_for_equality(ublas_C_sub, vcl_C_sub))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing left copy to D... ";
- if (check_for_equality(ublas_D_sub, vcl_D_sub))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 3: Addition //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A_sub2, vcl_A_sub2);
-
- std::cout << "Inplace add to submatrix: ";
- ublas_A_sub2 += ublas_A_sub2;
- vcl_A_sub2 += vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Inplace add to matrix: ";
- ublas_B += ublas_A_sub2;
- vcl_B += vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to submatrix: ";
- ublas_A_sub2 = ublas_A_sub2 + ublas_A_sub2;
- vcl_A_sub2 = vcl_A_sub2 + vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to matrix: ";
- ublas_B = ublas_A_sub2 + ublas_A_sub2;
- vcl_B = vcl_A_sub2 + vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 4: Subtraction //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A_sub2, vcl_A_sub2);
-
- std::cout << "Inplace add to submatrix: ";
- ublas_A_sub2 -= ublas_A_sub2;
- vcl_A_sub2 -= vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Inplace add to matrix: ";
- ublas_B -= ublas_A_sub2;
- vcl_B -= vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to submatrix: ";
- ublas_A_sub2 = ublas_A_sub2 - ublas_A_sub2;
- vcl_A_sub2 = vcl_A_sub2 - vcl_A_sub2;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Add to matrix: ";
- ublas_B = ublas_A_sub2 - ublas_A_sub2;
- vcl_B = vcl_A_sub2 - vcl_A_sub2;
-
- if (check_for_equality(ublas_B, vcl_B))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 5: Scaling //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A, vcl_A);
-
- std::cout << "Multiplication with CPU scalar: ";
- ublas_A_sub2 *= ScalarType(3.1415);
- vcl_A_sub2 *= ScalarType(3.1415);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Multiplication with GPU scalar: ";
- ublas_A_sub2 *= gpu_pi;
- vcl_A_sub2 *= gpu_pi;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "Division with CPU scalar: ";
- ublas_A_sub2 /= ScalarType(3.1415);
- vcl_A_sub2 /= ScalarType(3.1415);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Division with GPU scalar: ";
- ublas_A_sub2 /= gpu_pi;
- vcl_A_sub2 /= gpu_pi;
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 6: Matrix-Matrix Products //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_A, vcl_A);
- viennacl::copy(ublas_B, vcl_B);
- viennacl::copy(ublas_C, vcl_C);
-
- std::cout << "Assigned C = A * B: ";
- ublas_A_sub1 = prod(ublas_C_sub, ublas_D_sub);
- vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Assigned C = A^T * B: ";
- ublas_A_sub1 = prod(trans(ublas_C_sub), ublas_D_sub);
- vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), vcl_D_sub);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Assigned C = A * B^T: ";
- ublas_A_sub1 = prod(ublas_C_sub, trans(ublas_D_sub));
- vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, trans(vcl_D_sub));
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Assigned C = A^T * B^T: ";
- ublas_A_sub1 = prod(trans(ublas_C_sub), trans(ublas_D_sub));
- vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), trans(vcl_D_sub));
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- std::cout << "Inplace add of prod(): ";
- ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub);
- vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub);
-
- if (check_for_equality(ublas_A, vcl_A))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test 7: Matrix-Vector Products //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- VectorType ublas_v1(dim_large);
- for (std::size_t i=0; i<ublas_v1.size(); ++i)
- ublas_v1(i) = static_cast<ScalarType>(i);
- boost::numeric::ublas::vector_slice<VectorType> ublas_v1_sub(ublas_v1, ublas_s1);
-
- VectorType ublas_v2(dim_large);
- for (std::size_t i=0; i<ublas_v2.size(); ++i)
- ublas_v2(i) = static_cast<ScalarType>(i) - static_cast<ScalarType>(5);
- boost::numeric::ublas::vector_slice<VectorType> ublas_v2_sub(ublas_v2, ublas_s1);
-
-
- VCLVectorType vcl_v1(ublas_v1.size());
- viennacl::vector_slice<VCLVectorType> vcl_v1_sub(vcl_v1, vcl_s1);
- VCLVectorType vcl_v2(ublas_v2.size());
- viennacl::vector_slice<VCLVectorType> vcl_v2_sub(vcl_v2, vcl_s1);
- viennacl::copy(ublas_v1, vcl_v1);
- viennacl::copy(ublas_v2, vcl_v2);
- viennacl::copy(ublas_A_sub1, vcl_A_sub1);
-
-
- ublas_v2_sub = prod(ublas_A_sub1, ublas_v1_sub);
- vcl_v2_sub = viennacl::linalg::prod(vcl_A_sub1, vcl_v1_sub);
-
- if (check_for_equality_vector(ublas_v2, vcl_v2))
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
-
- return EXIT_SUCCESS;
-}
-
-int main (int argc, const char * argv[])
-{
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Matrix Slice" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: float" << std::endl;
- if (run_test<viennacl::row_major, float>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- if (run_test<viennacl::column_major, float>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
-
-
- if( viennacl::ocl::current_device().double_support() )
- {
- std::cout << "# Testing setup:" << std::endl;
- std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: double" << std::endl;
-
- if (run_test<viennacl::row_major, double>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- if (run_test<viennacl::column_major, double>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
-
diff --git a/tests/src/vector_range.cpp b/tests/src/vector_range.cpp
deleted file mode 100644
index efdd2ed..0000000
--- a/tests/src/vector_range.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-/* =========================================================================
- Copyright (c) 2010-2011, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#define VIENNACL_HAVE_UBLAS
-//#define NDEBUG
-//#define VIENNACL_BUILD_INFO
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <cmath>
-#include <algorithm>
-#include <stdio.h>
-#include <time.h>
-//#include "../benchmarks/benchmark-utils.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/norm_1.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/norm_inf.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-/*#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/io/matrix_market.hpp"*/
-#include "viennacl/vector_proxy.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/io.hpp"
-
-
-template <typename VectorType, typename VCLVectorType>
-bool check_for_equality(VectorType const & ublas_v, VCLVectorType const & vcl_v)
-{
- typedef typename VectorType::value_type value_type;
-
- std::vector<value_type> vcl_v_cpu(vcl_v.size());
- viennacl::copy(vcl_v, vcl_v_cpu);
-
- bool error_detected = false;
- for (size_t i=0; i<ublas_v.size(); ++i)
- {
- if (ublas_v[i] != vcl_v_cpu[i])
- {
- //check whether there are just some round-off errors:
- if (std::abs(ublas_v[i] - vcl_v_cpu[i]) / std::max(ublas_v[i], vcl_v_cpu[i]) > 1e-5)
- {
- std::cout << "Error at index (" << i << "): " << ublas_v[i] << " vs " << vcl_v_cpu[i] << std::endl;
- error_detected = true;
- }
- }
- }
-
- if (!error_detected)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- return true;
-}
-
-
-
-template <typename ScalarType>
-int run_test()
-{
- typedef boost::numeric::ublas::vector<ScalarType> VectorType;
-
- typedef viennacl::vector<ScalarType> VCLVectorType;
-
- std::size_t dim_large = 70;
- std::size_t dim_small = 27;
-
- //setup ublas objects:
- VectorType ublas_v1(dim_large);
- for (std::size_t i=0; i<ublas_v1.size(); ++i)
- ublas_v1(i) = static_cast<ScalarType>(i+1);
-
- VectorType ublas_v2(dim_small);
- for (std::size_t i=0; i<ublas_v2.size(); ++i)
- ublas_v2(i) = static_cast<ScalarType>(dim_large + i);
-
- boost::numeric::ublas::range ublas_r1(0, dim_small);
- boost::numeric::ublas::range ublas_r2(dim_small - 1, 2*dim_small - 1);
- boost::numeric::ublas::range ublas_r3(dim_large - dim_small, dim_large);
- boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub1(ublas_v1, ublas_r1);
- boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub2(ublas_v1, ublas_r2);
- boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub3(ublas_v1, ublas_r3);
-
- //Setup ViennaCL objects
- VCLVectorType vcl_v1(dim_large);
- viennacl::copy(ublas_v1, vcl_v1);
- VCLVectorType vcl_v2(dim_small);
- viennacl::copy(ublas_v2, vcl_v2);
-
- viennacl::range vcl_r1(0, dim_small);
- viennacl::range vcl_r2(dim_small - 1, 2*dim_small - 1);
- viennacl::range vcl_r3(dim_large - dim_small, dim_large);
- viennacl::vector_range<VCLVectorType> vcl_v1_sub1(vcl_v1, vcl_r1);
- viennacl::vector_range<VCLVectorType> vcl_v1_sub2(vcl_v1, vcl_r2);
- viennacl::vector_range<VCLVectorType> vcl_v1_sub3(vcl_v1, vcl_r3);
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Copy to GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- ublas_v1_sub1 = ublas_v2;
- viennacl::copy(ublas_v2, vcl_v1_sub1);
- std::cout << "Testing copy to begin of v1... ";
- check_for_equality(ublas_v1, vcl_v1);
-
-
- ublas_v1_sub2 = ublas_v2;
- viennacl::copy(ublas_v2, vcl_v1_sub2);
- std::cout << "Testing copy to middle of v1... ";
- check_for_equality(ublas_v1, vcl_v1);
-
-
-
- ublas_v1_sub3 = ublas_v2;
- viennacl::copy(ublas_v2, vcl_v1_sub3);
- std::cout << "Testing copy to bottom of v1... ";
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Copy from GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Testing beginning of v1... ";
- check_for_equality(ublas_v1_sub1, vcl_v1_sub1);
-
- std::cout << "Testing middle of v1... ";
- check_for_equality(ublas_v1_sub2, vcl_v1_sub2);
-
- std::cout << "Testing bottom of v1... ";
- check_for_equality(ublas_v1_sub3, vcl_v1_sub3);
-
-
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Assignments //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- viennacl::copy(ublas_v1, vcl_v1);
- viennacl::copy(ublas_v2, vcl_v2);
-
- std::cout << "Testing vector assigned to range... ";
- ublas_v1_sub1 = ublas_v2;
- vcl_v1_sub1 = vcl_v2;
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing range assigned to vector... ";
- ublas_v2 = ublas_v1_sub1;
- vcl_v2 = vcl_v1_sub1;
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing range assigned to range... ";
- ublas_v1_sub1 = ublas_v1_sub3;
- vcl_v1_sub1 = vcl_v1_sub3;
- check_for_equality(ublas_v1, vcl_v1);
-
-
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Inplace add //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_v1_sub1, vcl_v1_sub1);
-
- std::cout << "Testing inplace add at beginning of v1: ";
- ublas_v1_sub1 += ublas_v1_sub1;
- vcl_v1_sub1 += vcl_v1_sub1;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing inplace add at middle of v1: ";
- ublas_v1_sub2 += ublas_v1_sub2;
- vcl_v1_sub2 += vcl_v1_sub2;
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace add at end of v1: ";
- ublas_v1_sub3 += ublas_v1_sub3;
- vcl_v1_sub3 += vcl_v1_sub3;
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace add at end of v1: ";
- ublas_v1_sub3 += ublas_v1_sub3;
- vcl_v1_sub3 += vcl_v1_sub3;
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing inplace add of vector with range: ";
- viennacl::copy(ublas_v2, vcl_v2);
- ublas_v1_sub2 += ublas_v2;
- vcl_v1_sub2 += vcl_v2;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Inplace sub //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_v1_sub1, vcl_v1_sub1);
-
- std::cout << "Testing inplace sub at beginning of v1: ";
- ublas_v1_sub1 -= ublas_v1_sub1;
- vcl_v1_sub1 -= vcl_v1_sub1;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace sub at middle of v1: ";
- ublas_v1_sub2 -= ublas_v1_sub2;
- vcl_v1_sub2 -= vcl_v1_sub2;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace sub at end of v1: ";
- ublas_v1_sub3 -= ublas_v1_sub3;
- vcl_v1_sub3 -= vcl_v1_sub3;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing inplace sub of vector with range: ";
- viennacl::copy(ublas_v2, vcl_v2);
- ublas_v1_sub2 -= ublas_v2;
- vcl_v1_sub2 -= vcl_v2;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Inplace mult/div //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_v1, vcl_v1);
- viennacl::copy(ublas_v2, vcl_v2);
- ScalarType s = 3.14;
- viennacl::scalar<ScalarType> vcl_s = s;
-
- std::cout << "Multiplication with CPU scalar: ";
- ublas_v1_sub1 *= s;
- vcl_v1_sub1 *= s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Multiplication with GPU scalar: ";
- ublas_v1_sub3 *= vcl_s;
- vcl_v1_sub3 *= vcl_s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Division with CPU scalar: ";
- ublas_v1_sub1 /= s;
- vcl_v1_sub1 /= s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Division with GPU scalar: ";
- ublas_v1_sub3 /= vcl_s;
- vcl_v1_sub3 /= vcl_s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Vector Operations (norm_X, inner_prod, etc.) //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- for (std::size_t i=0; i<ublas_v1.size(); ++i) //reinit values
- ublas_v1(i) = static_cast<ScalarType>(i+1);
-
- viennacl::copy(ublas_v1_sub1, vcl_v1_sub1);
- viennacl::copy(ublas_v1_sub2, vcl_v1_sub2);
- viennacl::copy(ublas_v1_sub3, vcl_v1_sub3);
-
- double result_ublas = 0;
- double result_viennacl = 0;
-
- std::cout << "Testing norm_1: ";
- result_ublas = norm_1(ublas_v1_sub2);
- result_viennacl = viennacl::linalg::norm_1(vcl_v1_sub2);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing norm_2: ";
- result_ublas = norm_2(ublas_v1_sub2);
- result_viennacl = viennacl::linalg::norm_2(vcl_v1_sub2);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing norm_inf: ";
- result_ublas = norm_inf(ublas_v1_sub2);
- result_viennacl = viennacl::linalg::norm_inf(vcl_v1_sub2);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing inner_prod: ";
- result_ublas = inner_prod(ublas_v1_sub1, ublas_v1_sub3);
- result_viennacl = viennacl::linalg::inner_prod(vcl_v1_sub1, vcl_v1_sub3);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
-
-int main (int argc, const char * argv[])
-{
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Vector Range" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- std::cout << "# Testing setup:" << std::endl;
- //std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: float" << std::endl;
- if (run_test<float>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
-
- if( viennacl::ocl::current_device().double_support() )
- {
- std::cout << "# Testing setup:" << std::endl;
- //std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: double" << std::endl;
-
- if (run_test<double>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
-
diff --git a/tests/src/vector_slice.cpp b/tests/src/vector_slice.cpp
deleted file mode 100644
index 34f3c10..0000000
--- a/tests/src/vector_slice.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-/* =========================================================================
- Copyright (c) 2010-2011, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-#define VIENNACL_HAVE_UBLAS
-//#define NDEBUG
-//#define VIENNACL_BUILD_INFO
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <cmath>
-#include <algorithm>
-#include <stdio.h>
-#include <time.h>
-//#include "../benchmarks/benchmark-utils.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/norm_1.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/norm_inf.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-/*#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/io/matrix_market.hpp"*/
-#include "viennacl/vector_proxy.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/io.hpp"
-
-
-template <typename VectorType, typename VCLVectorType>
-bool check_for_equality(VectorType const & ublas_v, VCLVectorType const & vcl_v)
-{
- typedef typename VectorType::value_type value_type;
-
- std::vector<value_type> vcl_v_cpu(vcl_v.size());
- viennacl::copy(vcl_v, vcl_v_cpu);
-
- bool error_detected = false;
- for (size_t i=0; i<ublas_v.size(); ++i)
- {
- if (ublas_v[i] != vcl_v_cpu[i])
- {
- //check whether there are just some round-off errors:
- if (std::abs(ublas_v[i] - vcl_v_cpu[i]) / std::max(ublas_v[i], vcl_v_cpu[i]) > 1e-5)
- {
- std::cout << "Error at index (" << i << "): " << ublas_v[i] << " vs " << vcl_v_cpu[i] << std::endl;
- error_detected = true;
- }
- }
- }
-
- if (!error_detected)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- return EXIT_FAILURE;
- }
-
- return true;
-}
-
-
-
-template <typename ScalarType>
-int run_test()
-{
- typedef boost::numeric::ublas::vector<ScalarType> VectorType;
-
- typedef viennacl::vector<ScalarType> VCLVectorType;
-
- std::size_t dim_large = 90;
- std::size_t dim_small = 27;
-
- //setup ublas objects:
- VectorType ublas_v1(dim_large);
- for (std::size_t i=0; i<ublas_v1.size(); ++i)
- ublas_v1(i) = static_cast<ScalarType>(i+1);
-
- VectorType ublas_v2(dim_small);
- for (std::size_t i=0; i<ublas_v2.size(); ++i)
- ublas_v2(i) = static_cast<ScalarType>(dim_large + i);
-
- boost::numeric::ublas::slice ublas_s1(0, 2, dim_small);
- boost::numeric::ublas::slice ublas_s2(dim_small - 1, 2, dim_small);
- boost::numeric::ublas::slice ublas_s3(dim_large - 3 * dim_small, 3, dim_small);
- boost::numeric::ublas::vector_slice<VectorType> ublas_v1_sub1(ublas_v1, ublas_s1);
- boost::numeric::ublas::vector_slice<VectorType> ublas_v1_sub2(ublas_v1, ublas_s2);
- boost::numeric::ublas::vector_slice<VectorType> ublas_v1_sub3(ublas_v1, ublas_s3);
-
- //Setup ViennaCL objects
- VCLVectorType vcl_v1(dim_large);
- viennacl::copy(ublas_v1, vcl_v1);
- VCLVectorType vcl_v2(dim_small);
- viennacl::copy(ublas_v2, vcl_v2);
-
- viennacl::slice vcl_s1(0, 2, dim_small);
- viennacl::slice vcl_s2(dim_small - 1, 2, dim_small);
- viennacl::slice vcl_s3(dim_large - 3 * dim_small, 3, dim_small);
- viennacl::vector_slice<VCLVectorType> vcl_v1_sub1(vcl_v1, vcl_s1);
- viennacl::vector_slice<VCLVectorType> vcl_v1_sub2(vcl_v1, vcl_s2);
- viennacl::vector_slice<VCLVectorType> vcl_v1_sub3(vcl_v1, vcl_s3);
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Copy to GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Testing copy to begin of v1... ";
- ublas_v1_sub1 = ublas_v2;
- viennacl::copy(ublas_v2, vcl_v1_sub1);
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing copy to middle of v1... ";
- ublas_v1_sub2 = ublas_v2;
- viennacl::copy(ublas_v2, vcl_v1_sub2);
- check_for_equality(ublas_v1, vcl_v1);
-
-
-
- std::cout << "Testing copy to bottom of v1... ";
- ublas_v1_sub3 = ublas_v2;
- viennacl::copy(ublas_v2, vcl_v1_sub3);
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Copy from GPU //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- std::cout << "Testing beginning of v1... ";
- check_for_equality(ublas_v1_sub1, vcl_v1_sub1);
-
- std::cout << "Testing middle of v1... ";
- check_for_equality(ublas_v1_sub2, vcl_v1_sub2);
-
- std::cout << "Testing bottom of v1... ";
- check_for_equality(ublas_v1_sub3, vcl_v1_sub3);
-
-
-
- std::cout << std::endl;
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Assignments //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- viennacl::copy(ublas_v1, vcl_v1);
- viennacl::copy(ublas_v2, vcl_v2);
-
- std::cout << "Testing vector assigned to slice... ";
- ublas_v1_sub1 = ublas_v2;
- vcl_v1_sub1 = vcl_v2;
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing slice assigned to vector... ";
- ublas_v2 = ublas_v1_sub1;
- vcl_v2 = vcl_v1_sub1;
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing slice assigned to slice... ";
- ublas_v1_sub1 = ublas_v1_sub3;
- vcl_v1_sub1 = vcl_v1_sub3;
- check_for_equality(ublas_v1, vcl_v1);
-
-
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Inplace add //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_v1_sub1, vcl_v1_sub1);
-
- std::cout << "Testing inplace add at beginning of v1: ";
- ublas_v1_sub1 += ublas_v1_sub1;
- vcl_v1_sub1 += vcl_v1_sub1;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing inplace add at middle of v1: ";
- ublas_v1_sub2 += ublas_v1_sub2;
- vcl_v1_sub2 += vcl_v1_sub2;
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace add at end of v1: ";
- ublas_v1_sub3 += ublas_v1_sub3;
- vcl_v1_sub3 += vcl_v1_sub3;
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace add at end of v1: ";
- ublas_v1_sub3 += ublas_v1_sub3;
- vcl_v1_sub3 += vcl_v1_sub3;
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing inplace add of vector with slice: ";
- viennacl::copy(ublas_v2, vcl_v2);
- ublas_v1_sub2 += ublas_v2;
- vcl_v1_sub2 += vcl_v2;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Inplace sub //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_v1_sub1, vcl_v1_sub1);
-
- std::cout << "Testing inplace sub at beginning of v1: ";
- ublas_v1_sub1 -= ublas_v1_sub1;
- vcl_v1_sub1 -= vcl_v1_sub1;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace sub at middle of v1: ";
- ublas_v1_sub2 -= ublas_v1_sub2;
- vcl_v1_sub2 -= vcl_v1_sub2;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Testing inplace sub at end of v1: ";
- ublas_v1_sub3 -= ublas_v1_sub3;
- vcl_v1_sub3 -= vcl_v1_sub3;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Testing inplace sub of vector with slice: ";
- viennacl::copy(ublas_v2, vcl_v2);
- ublas_v1_sub2 -= ublas_v2;
- vcl_v1_sub2 -= vcl_v2;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Inplace mult/div //////////" << std::endl;
- std::cout << "//" << std::endl;
- viennacl::copy(ublas_v1, vcl_v1);
- viennacl::copy(ublas_v2, vcl_v2);
- ScalarType s = static_cast<ScalarType>(3.14);
- viennacl::scalar<ScalarType> vcl_s = s;
-
- std::cout << "Multiplication with CPU scalar: ";
- ublas_v1_sub1 *= s;
- vcl_v1_sub1 *= s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Multiplication with GPU scalar: ";
- ublas_v1_sub3 *= vcl_s;
- vcl_v1_sub3 *= vcl_s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
- std::cout << "Division with CPU scalar: ";
- ublas_v1_sub1 /= s;
- vcl_v1_sub1 /= s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
- std::cout << "Division with GPU scalar: ";
- ublas_v1_sub3 /= vcl_s;
- vcl_v1_sub3 /= vcl_s;
-
- check_for_equality(ublas_v1, vcl_v1);
-
-
-
-
- std::cout << "//" << std::endl;
- std::cout << "////////// Test: Vector Operations (norm_X, inner_prod, etc.) //////////" << std::endl;
- std::cout << "//" << std::endl;
-
- for (std::size_t i=0; i<ublas_v1.size(); ++i) //reinit values
- ublas_v1(i) = static_cast<ScalarType>(i+1);
-
- viennacl::copy(ublas_v1_sub1, vcl_v1_sub1);
- viennacl::copy(ublas_v1_sub2, vcl_v1_sub2);
- viennacl::copy(ublas_v1_sub3, vcl_v1_sub3);
-
- double result_ublas = 0;
- double result_viennacl = 0;
-
- std::cout << "Testing norm_1: ";
- result_ublas = norm_1(ublas_v1_sub2);
- result_viennacl = viennacl::linalg::norm_1(vcl_v1_sub2);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing norm_2: ";
- result_ublas = norm_2(ublas_v1_sub2);
- result_viennacl = viennacl::linalg::norm_2(vcl_v1_sub2);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing norm_inf: ";
- result_ublas = norm_inf(ublas_v1_sub2);
- result_viennacl = viennacl::linalg::norm_inf(vcl_v1_sub2);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- std::cout << "Testing inner_prod: ";
- result_ublas = inner_prod(ublas_v1_sub1, ublas_v1_sub3);
- result_viennacl = viennacl::linalg::inner_prod(vcl_v1_sub1, vcl_v1_sub3);
-
- if (std::abs(result_ublas - result_viennacl) / std::abs(result_ublas) < 1e-3)
- std::cout << "PASSED!" << std::endl;
- else
- {
- std::cout << std::endl << "TEST failed!";
- std::cout << "Ublas: " << result_ublas << std::endl;
- std::cout << "ViennaCL: " << result_viennacl << std::endl;
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
-
-int main (int argc, const char * argv[])
-{
- std::cout << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "## Test :: Vector Slice" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << "----------------------------------------------" << std::endl;
- std::cout << std::endl;
-
- std::cout << "# Testing setup:" << std::endl;
- //std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: float" << std::endl;
- if (run_test<float>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
-
- if( viennacl::ocl::current_device().double_support() )
- {
- std::cout << "# Testing setup:" << std::endl;
- //std::cout << " eps: " << 0 << std::endl;
- std::cout << " numeric: double" << std::endl;
-
- if (run_test<double>() != EXIT_SUCCESS)
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
-
diff --git a/viennacl/generator/compound_node.hpp b/viennacl/generator/compound_node.hpp
deleted file mode 100644
index d6b7f0d..0000000
--- a/viennacl/generator/compound_node.hpp
+++ /dev/null
@@ -1,199 +0,0 @@
-#ifndef VIENNACL_GENERATOR_COMPOUND_NODE_HPP
-#define VIENNACL_GENERATOR_COMPOUND_NODE_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file compound_node.hpp
- * @brief Structures corresponding to binary nodes in the expression tree
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include <string>
-#include <sstream>
-#include <set>
-
-#include "viennacl/generator/forwards.h"
-#include "viennacl/generator/meta_tools/utils.hpp"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- /**
- * @brief Binary node class for storing expression trees
- *
- * @tparam LHS_ LHS of the expression
- * @tparam OP_ Operator of the expression
- * @tparam RHS_ RHS of the expression
- * @tparam is_temporary_ Boolean for storing whether the binary node is temporary.
- */
- template<class LHS_, class OP_, class RHS_, bool is_temporary_>
- class compound_node
- {
- public:
- typedef LHS_ LHS;
- typedef RHS_ RHS;
- typedef OP_ OP;
-
- static const bool is_temporary = is_temporary_;
-
- static const std::string name()
- {
- return LHS::name() + "_" + OP::name() + "_" + RHS::name();
- }
- };
-
- template<class LHS_, class RHS_, bool is_temporary_>
- class compound_node<LHS_,inner_prod_type,RHS_, is_temporary_>
- {
- public:
- /**
- * @brief Specialization for the inner product
- */
- typedef LHS_ LHS;
- typedef RHS_ RHS;
- typedef inner_prod_type OP;
- typedef typename result_of::expression_type<RHS>::Result IntermediateType; //Note: Visual Studio does not allow to combine this line with the next one directly.
- typedef typename IntermediateType::ScalarType ScalarType;
-
- static const bool is_temporary = is_temporary_;
-
- enum { id = -2 };
-
- static const std::string kernel_arguments()
- {
- return "__global float * " + name() + '\n';
- }
-
- static const std::string name()
- {
- return LHS::name() + "_inprod_" + RHS::name();
- }
-
- static const std::string scalar_name()
- {
- return name() +"_s";
- };
-
- };
-
- /**
- * @brief Specialization for the matrix-vector product.
- */
- template<class LHS_, class RHS_, bool is_temporary_>
- class compound_node<LHS_,prod_type,RHS_, is_temporary_>
- {
- private:
- typedef compound_node<LHS_,prod_type,RHS_, is_temporary_> self_type;
-
- public:
- typedef LHS_ LHS;
- typedef RHS_ RHS;
-
- typedef prod_type OP;
- enum { id = LHS::id };
-
- typedef typename result_of::expression_type<RHS>::Result IntermediateType; //Note: Visual Studio does not allow to combine this line with the next one directly.
- typedef typename IntermediateType::ScalarType ScalarType;
- static const unsigned int Alignment = result_of::expression_type<RHS>::Result::Alignment;
- static const bool is_temporary = is_temporary_;
-
- static const std::string name()
- {
- return LHS::name() + "_prod_" + RHS::name();
- }
-
- static const std::string size2_name()
- {
- return "size_"+name();
- }
-
- static const std::string internal_size2_name()
- {
- return "internal_size_"+name();
- }
-
- static const std::string name_argument()
- {
- return " __global " + print_type<ScalarType*,Alignment>::value() + " " + name();
- }
-
- static const std::string kernel_arguments()
- {
- return name_argument() + ", unsigned int " + size2_name() + ", unsigned int " + internal_size2_name() + "\n" ;
- }
- };
-
-
- /** @brief Addition operator on 2 elements of the same type */
- template<class LHS_TYPE, class RHS_TYPE>
- typename enable_if< is_same_expression_type<LHS_TYPE, RHS_TYPE>,
- compound_node<LHS_TYPE, add_type, RHS_TYPE> >::type
- operator+ ( LHS_TYPE const & lhs, RHS_TYPE const & rhs )
- {
- return compound_node<LHS_TYPE, add_type, RHS_TYPE>();
- }
-
- /** @brief Substraction operator on 2 elements of the same type */
- template<class LHS_TYPE, class RHS_TYPE>
- typename enable_if< is_same_expression_type<LHS_TYPE, RHS_TYPE>,
- compound_node<LHS_TYPE, sub_type, RHS_TYPE> >::type
- operator- ( LHS_TYPE const & lhs, RHS_TYPE const & rhs )
- {
- return compound_node<LHS_TYPE, sub_type, RHS_TYPE>();
- }
-
- /** @brief Helper for the inner_prod operator */
- template<class LHS, class RHS>
- struct make_inner_prod;
-
- template<class LHS, class LHS_SIZE_DESCRIPTOR,
- class RHS, class RHS_SIZE_DESCRIPTOR>
- struct make_inner_prod<result_of::vector_expression<LHS, LHS_SIZE_DESCRIPTOR>,
- result_of::vector_expression<RHS, RHS_SIZE_DESCRIPTOR> >
- {
- typedef compound_node<LHS,inner_prod_type,RHS,true> Result;
- };
-
-
- /** @brief Inner product operator */
- template<class LHS, class RHS>
- compound_node<LHS,inner_prod_type,RHS,true> inner_prod ( LHS vec_expr1,RHS vec_expr2 )
- {
- typedef typename result_of::expression_type<LHS>::Result LHS_TYPE;
- typedef typename result_of::expression_type<RHS>::Result RHS_TYPE;
- typename make_inner_prod<LHS_TYPE,RHS_TYPE>::Result result;
-
- return result;;
- }
-
- /** @brief Product operator */
- template<class LHS, class RHS>
- compound_node<LHS,prod_type,RHS> prod ( LHS vec_expr1,RHS vec_expr2 )
- {
- return compound_node<LHS,prod_type,RHS>();
- }
-
- } // namespace generator
-} // namespace viennacl
-
-#endif
-
diff --git a/viennacl/generator/custom_operation.hpp b/viennacl/generator/custom_operation.hpp
deleted file mode 100644
index a83cf9b..0000000
--- a/viennacl/generator/custom_operation.hpp
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef VIENNACL_GENERATOR_CUSTOM_OPERATION_HPP
-#define VIENNACL_GENERATOR_CUSTOM_OPERATION_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file custom_operation.hpp
- * @brief User Interface for making custom operations.
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include <vector>
-#include <set>
-#include <algorithm>
-
-#include "viennacl/generator/get_kernels_infos.hpp"
-#include "viennacl/ocl/kernel.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-#include "viennacl/generator/meta_tools/utils.hpp"
-
-
-namespace viennacl
-{
- namespace generator
- {
-
- /** @brief A class for making a custom operation */
- class custom_operation
- {
-
- public :
-
- /** @brief CTor
- *
- * @param expression the expression to build the interface for
- * @param program_name_hint the code for this expression will be stored in the program provided by this name
- */
- template<class T>
- custom_operation ( T const & expression, std::string const & program_name_hint="" )
- {
- program_name_ = viennacl::generator::program_infos<T>::value (program_name_hint, sources_,runtime_wrappers_);
- create_program ( static_cast<bool> ( viennacl::generator::tree_utils::count_if<T,viennacl::generator::is_inner_product_leaf>::value ) );
- }
-
- /** @brief DTor */
- ~custom_operation()
- {
- for (viennacl::generator::runtime_wrappers_t::iterator it = runtime_wrappers_.begin();
- it != runtime_wrappers_.end();
- ++it)
- {
- delete (it->second.second);
- }
- }
-
- /** @brief Returns the list of the kernels involved in the operation */
- viennacl::generator::KernelsSources const & kernels_sources() const
- {
- return sources_;
- }
-
- /** @brief Return the generated sources */
- std::string kernels_source_code() const
- {
- std::string res;
- for (viennacl::generator::KernelsSources::const_iterator it = sources_.begin();
- it != sources_.end();
- ++it)
- {
- res += it->second + "\n";
- }
-
- return res;
- }
-
- /** @brief Returns the program name */
- std::string const & program_name() const { return program_name_; }
-
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0>
- custom_operation & operator() ( T0 const & t0)
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0, class T1>
- custom_operation & operator() ( T0 const & t0, T1 const & t1 )
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0, class T1, class T2>
- custom_operation & operator() ( T0 const & t0, T1 const & t1, T2 const & t2 )
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- user_args_.insert( std::make_pair(2, viennacl::any((T2*)&t2)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0, class T1, class T2, class T3>
- custom_operation & operator() ( T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3 )
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- user_args_.insert( std::make_pair(2, viennacl::any((T2*)&t2)) );
- user_args_.insert( std::make_pair(3, viennacl::any((T3*)&t3)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0, class T1, class T2, class T3, class T4>
- custom_operation & operator() ( T0 & t0, T1 & t1, T2 & t2, T3 & t3, T4 & t4 )
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- user_args_.insert( std::make_pair(2, viennacl::any((T2*)&t2)) );
- user_args_.insert( std::make_pair(3, viennacl::any((T3*)&t3)) );
- user_args_.insert( std::make_pair(4, viennacl::any((T4*)&t4)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0, class T1, class T2, class T3, class T4, class T5>
- custom_operation & operator() ( T0 & t0, T1 & t1, T2 & t2, T3 & t3, T4 & t4, T5 & t5 )
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- user_args_.insert( std::make_pair(2, viennacl::any((T2*)&t2)) );
- user_args_.insert( std::make_pair(3, viennacl::any((T3*)&t3)) );
- user_args_.insert( std::make_pair(4, viennacl::any((T4*)&t4)) );
- user_args_.insert( std::make_pair(5, viennacl::any((T5*)&t5)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template<class T0, class T1, class T2, class T3, class T4, class T5, class T6>
- custom_operation & operator() ( T0 & t0, T1 & t1, T2 & t2, T3 & t3, T4 & t4, T5 & t5, T6 & t6)
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- user_args_.insert( std::make_pair(2, viennacl::any((T2*)&t2)) );
- user_args_.insert( std::make_pair(3, viennacl::any((T3*)&t3)) );
- user_args_.insert( std::make_pair(4, viennacl::any((T4*)&t4)) );
- user_args_.insert( std::make_pair(5, viennacl::any((T5*)&t5)) );
- user_args_.insert( std::make_pair(6, viennacl::any((T6*)&t6)) );
- add_operation_arguments();
- return *this;
- }
-
- /** @brief Convenience for enqueuing the custom operation */
- template <class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
- custom_operation & operator() ( T0 & t0, T1 & t1, T2 & t2, T3 & t3, T4 & t4, T5 & t5, T6 & t6, T7 & t7 )
- {
- user_args_.insert( std::make_pair(0, viennacl::any((T0*)&t0)) );
- user_args_.insert( std::make_pair(1, viennacl::any((T1*)&t1)) );
- user_args_.insert( std::make_pair(2, viennacl::any((T2*)&t2)) );
- user_args_.insert( std::make_pair(3, viennacl::any((T3*)&t3)) );
- user_args_.insert( std::make_pair(4, viennacl::any((T4*)&t4)) );
- user_args_.insert( std::make_pair(5, viennacl::any((T5*)&t5)) );
- user_args_.insert( std::make_pair(6, viennacl::any((T6*)&t6)) );
- user_args_.insert( std::make_pair(7, viennacl::any((T7*)&t7)) );
- add_operation_arguments();
- return *this;
- }
-
- private:
-
- void create_program ( bool include_sum_kernel )
- {
- std::string kernels_string;
- for (viennacl::generator::KernelsSources::iterator it = sources_.begin();
- it != sources_.end();
- ++it )
- {
- kernels_string += it->second + "\n";
- }
-
- viennacl::ocl::program& program = viennacl::ocl::current_context().add_program(kernels_string, program_name_);
-
- for (viennacl::generator::KernelsSources::iterator it = sources_.begin();
- it != sources_.end();
- ++it)
- {
- program.add_kernel(it->first);
- }
- }
-
-
- void add_operation_arguments()
- {
- for (generator::runtime_wrappers_t::iterator it = runtime_wrappers_.begin();
- it != runtime_wrappers_.end();
- ++it)
- {
- std::string const & kernel_name = it->first;
- viennacl::ocl::kernel& current_kernel = viennacl::ocl::current_context().get_program(program_name_).get_kernel(kernel_name);
- const unsigned int arg_pos = it->second.first;
- generator::result_of::runtime_wrapper * current_arg = it->second.second;
- #ifdef VIENNACL_DEBUG_CUSTOM_OPERATION
- std::cout << "Enqueuing : Kernel " << kernel_name << " Argument : " << current_arg->name() << " | Pos : " << arg_pos << std::endl;
- #endif
- current_arg->enqueue(arg_pos,current_kernel,user_args_,temporaries_);
- }
- }
-
- private :
- typedef std::map<std::string, unsigned int> CurrentArgsContainer;
- CurrentArgsContainer current_args_pos_;
-
- std::map<unsigned int, viennacl::any> user_args_;
-
- std::string program_name_;
-
- std::vector<viennacl::ocl::local_mem> lmem_;
-
- viennacl::generator::KernelsSources sources_;
-
- viennacl::generator::runtime_wrappers_t runtime_wrappers_;
-
- std::map<std::string, viennacl::ocl::handle<cl_mem> > temporaries_;
- };
-
-
- inline void enqueue_custom_op(viennacl::generator::custom_operation & op, viennacl::ocl::command_queue const & queue)
- {
- for(std::map<std::string,std::string>::const_iterator it = op.kernels_sources().begin(); it != op.kernels_sources().end() ; ++it)
- {
- std::string current_kernel_name = it->first;
- #ifdef VIENNACL_DEBUG_CUSTOM_OPERATION
- std::cout << "Enqueueing " << current_kernel_name << std::endl;
- #endif
- enqueue(viennacl::ocl::current_context().get_program(op.program_name()).get_kernel(current_kernel_name));
- }
- }
-
- }
-}
-
-#endif
diff --git a/viennacl/generator/elementwise_modifier.hpp b/viennacl/generator/elementwise_modifier.hpp
deleted file mode 100644
index c9c72e2..0000000
--- a/viennacl/generator/elementwise_modifier.hpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef VIENNACL_GENERATOR_ELEMENTWISE_MODIFIER_HPP
-#define VIENNACL_GENERATOR_ELEMENTWISE_MODIFIER_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/elementwise_modifier.hpp
- * @brief Contains the stuffs related to the elementwise_modifier
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include <typeinfo>
-#include <string>
-#include <algorithm>
-
-#include "viennacl/generator/forwards.h"
-
-namespace viennacl
-{
- namespace generator
- {
-
- /**
- * @brief Implementation of the elementwise_modifier
- *
- * @tparam T the underlying expression to modify
- * @tparam U the function returning the modifier's expression
- */
- template<class T, std::string (*U)()>
- struct elementwise_modifier_impl
- {
- private:
- static std::string expr_name()
- {
- std::string res = U();
- std::replace(res.begin(),res.end(),'/','o');
- std::replace(res.begin(),res.end(),'*','x');
- std::replace(res.begin(),res.end(),'+','a');
- std::replace(res.begin(),res.end(),'-','s');
- std::replace(res.begin(),res.end(),' ','_');
- std::replace(res.begin(),res.end(),'(','p');
- std::replace(res.begin(),res.end(),')','p');
- return res;
- }
-
- public:
- typedef T PRIOR_TYPE;
-
- enum { id = -2 };
-
- static std::string name()
- {
- return expr_name() + '_' + T::name();
- }
-
- static std::string modify(std::string const & replacer)
- {
- std::string result(U());
- int pos;
- while( (pos = result.find('X')) != std::string::npos )
- {
- result.replace(pos, 1, '(' + replacer + ')' );
- }
-
- return result;
- }
- };
-
- /** @brief Operator for creating an elementwise_modifier from an expression */
- template<std::string (*U)(),class T>
- elementwise_modifier_impl<T,U> elementwise_modifier( T const & t )
- {
- return elementwise_modifier_impl<T,U>();
- }
-
- }
-}
-
-#endif
diff --git a/viennacl/generator/get_kernels_infos.hpp b/viennacl/generator/get_kernels_infos.hpp
deleted file mode 100644
index 6e00bbd..0000000
--- a/viennacl/generator/get_kernels_infos.hpp
+++ /dev/null
@@ -1,579 +0,0 @@
-#ifndef VIENNACL_GENERATOR_CREATE_KERNEL_HPP
-#define VIENNACL_GENERATOR_CREATE_KERNEL_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/get_kernels_infos.hpp
- * @brief Provides information about kernels
- *
- * Generator code contributed by Philippe Tillet
- */
-
-// #include "kernel_utils.hpp"
-
-#include <map>
-
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/operation_types.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_matrix.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_vector.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_scalars.hpp"
-#include "viennacl/generator/tree_operations.hpp"
-#include "viennacl/generator/tokens_management.hpp"
-#include "viennacl/generator/make_code/make_code.hpp"
-#include "viennacl/generator/meta_tools/typelist.hpp"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- template <class T, bool is_first = true>
- struct arguments_list;
-
- template <bool is_first>
- struct arguments_list<NullType, is_first >
- {
- static const std::string string_value() { return ""; }
- };
-
-
- template <class Head, class Tail, bool is_first >
- struct arguments_list<typelist<Head,Tail>, is_first >
- {
- private:
- static const std::string add_comma ( Int2Type<false> ) { return ", "; }
- static const std::string add_comma ( Int2Type<true> ) { return ""; }
-
- public:
- static const std::string string_value()
- {
- return add_comma ( Int2Type<is_first>() )
- + Head::kernel_arguments()
- + arguments_list<Tail,false>::string_value();
- }
- };
-
- template<class T>
- struct requires_local_buffer
- {
- enum { value = is_inner_product_leaf<T>::value };
- };
-
- template<class T>
- struct requires_local_buffer<inner_prod_impl_t<T> >
- {
- enum { value = 1 };
- };
-
- template<class T>
- struct requires_local_buffer_list;
-
- template<class Head, class Tail>
- struct requires_local_buffer_list<typelist<Head, Tail> >
- {
- enum { value = static_cast<bool> ( tree_utils::count_if<Head, requires_local_buffer>::value
- || requires_local_buffer_list<Tail>::value )
- };
- };
-
- template<class Head>
- struct requires_local_buffer_list<typelist<Head, NullType> >
- {
- enum { value = static_cast<bool> ( tree_utils::count_if<Head, requires_local_buffer >::value ) };
- };
-
- template<class TLIST, class ASSIGNED>
- struct calculate_tokens
- {
- typedef typename TLIST::Head::first_type current_token;
- typedef typename TLIST::Head::second_type token_op;
-
- static const std::string value()
- {
- return make_code<current_token, token_op, ASSIGNED>::value()
- + calculate_tokens<typename TLIST::Tail, ASSIGNED>::value();
- }
- };
-
- template<class ASSIGNED>
- struct calculate_tokens<NullType, ASSIGNED>
- {
- static const std::string value() { return ""; }
- };
-
- template<class T>
- struct get_temporary_dependancies
- {
- typedef typename get_temporary_dependancies<typename result_of::expression_type<T>::Result>::Result Result;
- };
-
- template<class T, class SIZE_DESCRIPTOR>
- struct get_temporary_dependancies<result_of::vector_expression<T,SIZE_DESCRIPTOR> >
- {
- typedef SIZE_DESCRIPTOR Result;
- };
-
- template<class T>
- struct get_temporary_dependancies<result_of::scalar_expression<T> >
- {
- typedef NullType Result;
- };
-
- template<>
- struct get_temporary_dependancies<NullType>
- {
- typedef NullType Result;
- };
-
- template<class Head, class Tail>
- struct get_temporary_dependancies<typelist<Head,Tail> >
- {
- typedef typename typelist_utils::append<typename get_temporary_dependancies<Tail>::Result,
- typename get_temporary_dependancies<Head>::Result>::Result Result;
- };
-
-
- template<class T>
- struct get_kernel_arguments;
-
- template<class LHS, class OP, class RHS, bool _is_temporary>
- struct get_kernel_arguments<compound_node<LHS,OP,RHS,_is_temporary> >
- {
- typedef compound_node<LHS,OP,RHS,_is_temporary> Arg;
- typedef typename tree_utils::extract_if<Arg, is_regular_kernel_parameter, typelist_utils::compare1>::Result RegularLeafs;
- typedef typename tree_utils::extract_if<Arg, is_temporary_kernel_parameter>::Result TemporaryLeafs;
- typedef typename get_temporary_dependancies<TemporaryLeafs>::Result TemporaryDependancies;
- typedef typename typelist_utils::fuse<RegularLeafs, TemporaryLeafs, typelist_utils::compare1>::Result TmpResult0;
- typedef typename typelist_utils::fuse<TmpResult0, TemporaryDependancies, typelist_utils::compare1>::Result TmpResult1;
- typedef typename typelist_utils::no_duplicates<TmpResult1>::Result Result;
- };
-
- template<class Head, class Tail>
- struct get_kernel_arguments<typelist<Head,Tail> >
- {
- typedef typename typelist_utils::fuse<typename get_kernel_arguments<Head>::Result,
- typename get_kernel_arguments<Tail>::Result,
- typelist_utils::compare1>::Result TmpResult;
- typedef typename typelist_utils::no_duplicates<TmpResult>::Result Result;
- };
-
- template<>
- struct get_kernel_arguments<NullType>
- {
- typedef NullType Result;
- };
-
- template<class TreeList>
- struct kernel_header;
-
- template<class Head, class Tail>
- struct kernel_header<typelist<Head, Tail> >
- {
- private:
- typedef typelist<Head, Tail> Arg;
- typedef typename tree_utils::expand<Head>::Result ExpandedHead;
- typedef typename tree_utils::flip_tree<ExpandedHead>::Result NewHead;
- typedef typename get_kernel_arguments<Arg>::Result Arguments;
-
- static const std::string shared_memory ( Int2Type<false> ) { return ""; }
- static const std::string shared_memory ( Int2Type<true> ) { return ",__local float* shared_memory_ptr\n"; }
-
- public:
- static const std::string value ( std::string const & name )
- {
- return "__kernel void " + name + "(\n"
- + arguments_list<Arguments>::string_value()
- + shared_memory ( Int2Type<requires_local_buffer_list<Arg>::value>() )
- + ")\n";
- }
- };
-
-
- template<class TreeList, bool is_in_temporary_kernel, bool is_first = true>
- struct kernel_core;
-
- template<class TList>
- struct finalize_inner_products
- {
- static const std::string value() { return ""; }
- };
-
- template<class Head, class Tail>
- struct finalize_inner_products<typelist<Head,Tail> >
- {
- static const std::string value()
- {
- return make_code<Head,assign_type,Head>::value() + finalize_inner_products<Tail>::value();
- }
- };
-
- template<class Head, class Tail, bool is_in_temporary_kernel, bool is_first>
- struct kernel_core<typelist<Head, Tail>, is_in_temporary_kernel, is_first >
- {
- private:
- typedef typelist<Head, Tail> Arg;
- typedef typename tree_utils::expand<Head>::Result ExpandedHead;
- typedef typename tree_utils::flip_tree<ExpandedHead>::Result NewHead;
- typedef typename generate_tokens<NewHead, is_in_temporary_kernel>::Result Tokens;
- typedef typename tree_utils::extract_if<typename NewHead::RHS,is_inner_product_leaf>::Result InProdsT;
- typedef typename typelist_utils::no_duplicates<InProdsT>::Result InProds;
- typedef typename Head::LHS LHS;
-
- static const std::string additional_declarations ( Int2Type<true> ) {
- return "float sum;\n";
- }
-
- static const std::string additional_declarations ( Int2Type<false> ) { return "" ; }
-
-
- static const std::string head ( Int2Type<true> ) {
- return "{\n"
- + additional_declarations ( Int2Type<requires_local_buffer_list<Arg>::value>() );
- }
-
- static const std::string head ( Int2Type<false> ) { return "\n" ; }
-
- public:
- static const std::string value()
- {
- return head ( Int2Type<is_first>() )
- + finalize_inner_products<InProds>::value()
- + calculate_tokens<Tokens, LHS>::value()
- + kernel_core<Tail, is_in_temporary_kernel, false>::value();
- }
- };
-
- template<bool is_in_temporary_kernel>
- struct kernel_core<NullType, is_in_temporary_kernel, false>
- {
- static const std::string value() { return "}"; }
- };
-
- template<class T>
- struct remove_temporary
- {
- typedef T Result;
- };
-
- template<class Ref>
- struct remove_temporary<tmp_symbolic_vector<Ref> >
- {
- typedef Ref Result;
- };
-
- template<class LHS, class OP, class RHS>
- struct remove_temporary<compound_node<LHS,OP,RHS,true> >
- {
- typedef compound_node<LHS,OP,RHS> Result;
- };
-
- template<class TreeList, class Assigned>
- struct get_all_temporaries
- {
- private:
- typedef typename TreeList::Head Head;
- typedef typename TreeList::Tail Tail;
- typedef typename remove_temporary<Head>::Result NewHead;
- typedef typename tree_utils::register_temporaries<NewHead, true, Assigned>::Result Registered;
- typedef typename tree_utils::extract_if<Registered,is_temporary>::Result Temporaries;
- typedef typename get_all_temporaries<Tail, Assigned>::Result NewList;
-
- public:
- typedef typename typelist_utils::fuse<Temporaries, NewList>::Result Result;
- };
-
- template<class Assigned>
- struct get_all_temporaries<NullType, Assigned>
- {
- typedef NullType Result;
- };
-
- template<class T>
- struct Unroll
- {
- typedef NullType Result;
- };
-
- template<class Head, class Tail>
- struct Unroll<typelist<Head, Tail> >
- {
- typedef typename typelist_utils::fuse<Head,
- typename Unroll<Tail>::Result >::Result Result;
- };
-
- template<class HeadHead, class HeadTail, class Tail>
- struct Unroll<typelist<typelist<HeadHead, HeadTail>, Tail> >
- {
- typedef typename typelist_utils::fuse<typelist<HeadHead, HeadTail>,
- typename Unroll<Tail>::Result >::Result Result;
- };
-
- template<>
- struct Unroll<typelist<NullType, NullType> >
- {
- typedef NullType Result;
- };
-
- template<class T>
- struct find_prior_implementations
- {
- typedef T Result;
- };
-
- template<class LHS, class RHS>
- struct find_prior_implementations<compound_node<LHS, inner_prod_type, RHS,true> >
- {
- typedef inner_prod_impl_t<compound_node<LHS, inner_prod_type, RHS,true> > Result;
- };
-
- template<class Head, class Tail>
- struct find_prior_implementations<typelist<Head,Tail> >
- {
- private:
- typedef typename find_prior_implementations<Head>::Result NewHead;
- typedef typename find_prior_implementations<Tail>::Result NewTail;
-
- public:
- typedef typelist<NewHead,NewTail> Result;
- };
-
- template<class TreeList, class Assigned>
- struct register_kernels
- {
- private:
- typedef typename get_all_temporaries<TreeList, Assigned>::Result Temporaries;
- typedef typename register_kernels<Temporaries, Assigned>::Result CurrentList;
- typedef typename typelist_utils::erase<Temporaries,
- typename Unroll<CurrentList>::Result>::Result NextTemporaries;
- typedef typename find_prior_implementations<NextTemporaries>::Result NextList;
-
- public:
- typedef typename typelist_utils::append<CurrentList, NextList>::Result Result;
- };
-
- template<class Assigned>
- struct register_kernels<NullType, Assigned>
- {
- typedef typelist<NullType,NullType> Result;
- };
-
- template<class TreeList, bool is_in_temporary_kernel, class Enable = void>
- struct kernel_string
- {
- static const std::string value(std::string name)
- {
- return std::string ( kernel_header<TreeList>::value(name)
- + kernel_core<TreeList, is_in_temporary_kernel>::value() );
- }
- };
-
- template<class T>
- struct make_impl;
-
- template<class LHS_, class RHS_, bool is_temporary_>
- struct make_impl<compound_node< LHS_, inner_prod_type, RHS_, is_temporary_ > >
- {
- typedef inner_prod_impl_t<compound_node< LHS_, inner_prod_type, RHS_, is_temporary_ > > Result;
- };
-
- template<class Temporary>
- struct format_temporaries;
-
- template<class Head, class Tail>
- struct format_temporaries<typelist<Head,Tail> >
- {
- typedef compound_node<Head, assign_type, typename remove_temporary<Head>::Result> NewHead;
- typedef typename typelist_utils::append<typename format_temporaries<Tail>::Result, NewHead>::Result Result;
- };
-
- template<>
- struct format_temporaries<NullType>
- {
- typedef NullType Result;
- };
-
- typedef std::map<std::string,std::string> KernelsSources;
-
- template<class TemporaryKernelsList, class MainOperation, int Start, int End>
- struct fill_sources
- {
- typedef typename format_temporaries<typename typelist_utils::type_at< TemporaryKernelsList,Start>::Result >::Result CurrentList;
-
- static void execute(KernelsSources & sources, std::string const & operation_name)
- {
- std::string current_kernel_name("__" + operation_name + "_kernel"
- + to_string(typelist_utils::length<TemporaryKernelsList>::value - 1 - Start));
- sources.insert( std::make_pair( current_kernel_name,
- kernel_string< CurrentList, true>::value(current_kernel_name)
- )
- );
- fill_sources<TemporaryKernelsList, MainOperation, Start+1, End>::execute(sources, operation_name);
- }
- };
-
- template<class TemporaryKernelsList, class MainOperation, int End>
- struct fill_sources<TemporaryKernelsList, MainOperation, End, End>
- {
- static void execute(KernelsSources & sources, std::string const & operation_name)
- {
- sources.insert(std::make_pair(operation_name,
- kernel_string<MainOperation, false>::value(operation_name)
- )
- );
- }
- };
-
- typedef std::multimap<std::string, std::pair<unsigned int, result_of::runtime_wrapper*> > runtime_wrappers_t;
-
- template<class U>
- struct foreach_functor
- {
- static void execute(unsigned int & arg_pos, runtime_wrappers_t & runtime_wrappers, std::string const & name)
- {
- foreach_functor<typename result_of::expression_type<U>::Result >::execute(arg_pos, runtime_wrappers, name);
- }
- };
-
- template<>
- struct foreach_functor<NullType>;
-
- template<class T>
- struct foreach_functor<result_of::scalar_expression<T> >
- {
- static void execute(unsigned int & arg_pos, runtime_wrappers_t & runtime_wrappers, std::string const & name)
- {
- runtime_wrappers.insert(runtime_wrappers_t::value_type(name,
- std::make_pair(arg_pos,
- result_of::scalar_expression<T>::runtime_descriptor())
- )
- );
- arg_pos += 1;
- }
- };
-
- template<class T, class SIZE_DESCRIPTOR>
- struct foreach_functor<result_of::vector_expression<T,SIZE_DESCRIPTOR> >
- {
- static void execute(unsigned int & arg_pos, runtime_wrappers_t & runtime_wrappers, std::string const & name)
- {
- runtime_wrappers.insert(runtime_wrappers_t::value_type(name,
- std::make_pair(arg_pos,
- result_of::vector_expression<T,SIZE_DESCRIPTOR>::runtime_descriptor())
- )
- );
- arg_pos += 3;
- }
- };
-
- template<class T, class SIZE1_DESCRIPTOR, class SIZE2_DESCRIPTOR>
- struct foreach_functor<result_of::matrix_expression<T,SIZE1_DESCRIPTOR, SIZE2_DESCRIPTOR> >
- {
- static void execute(unsigned int & arg_pos, runtime_wrappers_t & runtime_wrappers, std::string const & name )
- {
- runtime_wrappers.insert(runtime_wrappers_t::value_type(name,
- std::make_pair(arg_pos,
- result_of::matrix_expression<T,SIZE1_DESCRIPTOR,SIZE2_DESCRIPTOR>::runtime_descriptor())
- )
- );
- arg_pos += 5;
- }
- };
-
- template<class TemporaryKernelsList, class MainOperation, int Start, int End>
- struct fill_args
- {
- typedef typename format_temporaries<typename typelist_utils::type_at<TemporaryKernelsList,Start>::Result >::Result CurrentList;
- typedef typename get_kernel_arguments<CurrentList>::Result Arguments;
-
- static void execute(runtime_wrappers_t & runtime_wrappers, std::string const & operation_name)
- {
- unsigned int arg_pos = 0;
- std::string current_kernel_name("__"+operation_name+"_kernel"
- + to_string(typelist_utils::length<TemporaryKernelsList>::value - 1 - Start));
-
- typelist_utils::ForEach<Arguments, foreach_functor>::execute(arg_pos,runtime_wrappers,current_kernel_name);
-
- if(requires_local_buffer_list<CurrentList>::value)
- {
- runtime_wrappers.insert(runtime_wrappers_t::value_type(current_kernel_name,
- std::make_pair(arg_pos,
- new result_of::shared_memory_wrapper())
- )
- );
- }
-
- fill_args<TemporaryKernelsList,MainOperation,Start+1,End>::execute(runtime_wrappers,operation_name);
- }
- };
-
- template<class TemporaryKernelsList, class MainOperation, int End>
- struct fill_args<TemporaryKernelsList, MainOperation, End, End>
- {
- private:
- typedef MainOperation CurrentList;
- typedef typename get_kernel_arguments<CurrentList>::Result Arguments;
-
- public:
- static void execute(runtime_wrappers_t & runtime_wrappers, std::string const & operation_name)
- {
- unsigned int arg_pos = 0;
- typelist_utils::ForEach<Arguments, foreach_functor>::execute(arg_pos, runtime_wrappers, operation_name);
- if(requires_local_buffer_list<CurrentList>::value)
- {
- runtime_wrappers.insert(runtime_wrappers_t::value_type(operation_name,
- std::make_pair(arg_pos,
- new result_of::shared_memory_wrapper())
- )
- );
- }
- }
- };
-
- template<class ARG>
- struct program_infos
- {
- typedef typename tree_utils::register_temporaries<ARG,false, typename ARG::LHS>::Result NewARG;
- typedef typelist<NewARG,NullType> MainOperation_Init;
- typedef typename register_kernels<MainOperation_Init, typename ARG::LHS>::Result KernelsList;
-
- static std::string value(std::string const & name_hint, KernelsSources & sources, runtime_wrappers_t & runtime_wrappers)
- {
- std::string operation_name = ARG::name();
- std::string program_name( (!name_hint.empty()) ? name_hint : operation_name );
-
- fill_sources<KernelsList,
- MainOperation_Init,
- 0,
- typelist_utils::length<KernelsList>::value - 1>::execute(sources,operation_name);
-
- fill_args<KernelsList,
- MainOperation_Init,
- 0,
- typelist_utils::length<KernelsList>::value - 1>::execute(runtime_wrappers,operation_name);
-
- return program_name;
- }
- };
-
-
-
- } // namespace generator
-} // namespace viennacl
-#endif
diff --git a/viennacl/generator/make_code/expression.hpp b/viennacl/generator/make_code/expression.hpp
deleted file mode 100644
index 5ae8dc9..0000000
--- a/viennacl/generator/make_code/expression.hpp
+++ /dev/null
@@ -1,163 +0,0 @@
-#ifndef VIENNACL_GENERATOR_MAKE_CODE_EXPRESSION_HPP
-#define VIENNACL_GENERATOR_MAKE_CODE_EXPRESSION_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/make_code/expression.hpp
- * @brief Directives for generating code for simple expressions.
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/elementwise_modifier.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_scalars.hpp"
-#include "viennacl/generator/meta_tools/utils.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- template <class T>
- struct make_expression_code
- {
- static const std::string value(std::string const & loop_accessor)
- {
- return T::name() + '[' + loop_accessor + ']';
- }
- };
-
- template <unsigned int ID, class SCALARTYPE>
- struct make_expression_code<cpu_symbolic_scalar<ID,SCALARTYPE> >
- {
- static const std::string value(std::string const & loop_accessor)
- {
- return cpu_symbolic_scalar<ID,SCALARTYPE>::name();
- }
- };
-
- template <unsigned int ID,class SCALARTYPE>
- struct make_expression_code<gpu_symbolic_scalar<ID,SCALARTYPE> >
- {
- static const std::string value(std::string const & loop_accessor)
- {
- return '*' + gpu_symbolic_scalar<ID,SCALARTYPE>::name();
- }
- };
-
- template <class LHS, class RHS, bool is_temporary >
- struct make_expression_code<compound_node<LHS,inner_prod_type,RHS, is_temporary> >
- {
- private:
- typedef compound_node<LHS,inner_prod_type,RHS, is_temporary> T;
-
- public:
- static const std::string value(std::string const & loop_accessor)
- {
- return T::name() +"_sum";
- }
- };
-
- template< >
- struct make_expression_code< NullType >
- {
- static const std::string value(std::string const & loop_accessor)
- {
- return "0";
- }
- };
-
- template<class T, std::string (*U)()>
- struct make_expression_code< elementwise_modifier_impl<T, U> >
- {
- typedef elementwise_modifier_impl<T, U> EW_M;
- static const std::string value ( std::string const & loop_accessor )
- {
- return EW_M::modify(make_expression_code<T>::value(loop_accessor));
- }
- };
-
- template<class LHS, class OP, class RHS >
- struct make_expression_code<compound_node<LHS, OP, RHS, false> >
- {
- static const std::string value(std::string const & loop_accessor = "k")
- {
- return make_expression_code<LHS>::value(loop_accessor)
- + OP::expression_string()
- + make_expression_code<RHS>::value(loop_accessor);
- }
- };
-
- template<class LHS, class RHS, unsigned int Alignment>
- struct dot_product_impl
- {
- static const std::string value(std::string lhs_loop_id,
- std::string rhs_loop_id)
- {
- return "dot(" + make_expression_code<LHS>::value(lhs_loop_id) + "," + make_expression_code<RHS>::value(rhs_loop_id) + ")";
- }
- };
-
- template<class LHS, class RHS>
- struct dot_product_impl<LHS, RHS, 8>
- {
- static const std::string value(std::string lhs_loop_id,
- std::string rhs_loop_id)
- {
- return "dot(" + make_expression_code<LHS>::value(lhs_loop_id) + ".s0123" + ","
- + make_expression_code<RHS>::value(rhs_loop_id) + ".s0123 )"
- + " + dot(" + make_expression_code<LHS>::value(lhs_loop_id) + ".s4567" + ","
- + make_expression_code<RHS>::value(rhs_loop_id) + ".s4567 );"
- ;
- }
- };
-
- template<class LHS, class RHS>
- struct dot_product_impl<LHS, RHS, 16>
- {
- static const std::string value(std::string lhs_loop_id,std::string rhs_loop_id)
- {
- return "dot(" + make_expression_code<LHS>::value(lhs_loop_id) + ".s0123" + ","
- + make_expression_code<RHS>::value(rhs_loop_id) + ".s0123)"
- +"\n + dot(" + make_expression_code<LHS>::value(lhs_loop_id) + ".s4567" + ","
- + make_expression_code<RHS>::value(rhs_loop_id) + ".s4567) "
- +"\n + dot(" + make_expression_code<LHS>::value(lhs_loop_id) + ".s89ab" + ","
- + make_expression_code<RHS>::value ( rhs_loop_id ) + ".s89ab) "
- +"\n + dot(" + make_expression_code<LHS>::value ( lhs_loop_id ) + ".scdef" + ","
- + make_expression_code<RHS>::value ( rhs_loop_id ) + ".scdef)"
- ;
- }
- };
-
- template<class LHS, class RHS>
- struct dot_product
- {
- static const std::string value(std::string lhs_loop_id,std::string rhs_loop_id)
- {
- return dot_product_impl<LHS,RHS,LHS::Alignment>::value(lhs_loop_id,rhs_loop_id);
- }
- };
-
- }
-
-}
-
-#endif
-
-
diff --git a/viennacl/generator/make_code/inner_product.hpp b/viennacl/generator/make_code/inner_product.hpp
deleted file mode 100644
index 101c1fb..0000000
--- a/viennacl/generator/make_code/inner_product.hpp
+++ /dev/null
@@ -1,131 +0,0 @@
-#ifndef VIENNACL_GENERATOR_MAKE_CODE_INNER_PRODUCT_HPP
-#define VIENNACL_GENERATOR_MAKE_CODE_INNER_PRODUCT_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/make_code/inner_product.hpp
- * @brief Directives for generating code for the inner product.
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include "viennacl/generator/make_code/expression.hpp"
-#include "viennacl/generator/meta_tools/utils.hpp"
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-#include "viennacl/generator/tree_operations.hpp"
-
-
-namespace viennacl
-{
- namespace generator
- {
-
- template <class T>
- struct inner_prod_impl_t
- {
- typedef T PRIOR_TYPE;
-
- static const std::string name()
- {
- return T::name();
- }
-
- static const std::string kernel_arguments()
- {
- return T::kernel_arguments();
- }
- enum { id = T::id };
- };
-
- template <class TOKEN, class OP, class ASSIGNED, class Enable=void>
- struct make_code;
-
- template <class T, class OP, class ASSIGNED>
- struct make_code<inner_prod_impl_t<T>, OP, ASSIGNED>
- {
- private:
- typedef typename tree_utils::extract_if<T,is_pure_inner_product_leaf>::Result::Head ARG;
- typedef typename ARG::LHS LHS;
- typedef typename ARG::RHS RHS;
-
- static const std::string main_size()
- {
- return result_of::expression_type<LHS>::Result::internal_size_expression();
- }
-
- public :
-
- static const std::string value()
- {
- return "sum = 0;\n"
- "for (unsigned int k = (get_group_id(0) * " + main_size() + ")/get_num_groups(0)+ get_local_id(0); k < ((get_group_id(0)+1) * " + main_size() +")/get_num_groups(0); k += get_local_size(0))\n"
- " sum += " + dot_product<LHS,RHS>::value("k","k") + ";\n"
- "shared_memory_ptr[get_local_id(0)] = sum;\n"
-
- "for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)\n"
- " {\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " if (get_local_id(0) < stride)\n"
- " shared_memory_ptr[get_local_id(0)] += shared_memory_ptr[get_local_id(0)+stride];\n"
- " }\n"
- "barrier(CLK_LOCAL_MEM_FENCE);\n"
- "if (get_local_id(0) == 0)\n"
- " " + ASSIGNED::name() + "[get_group_id(0)] = shared_memory_ptr[0];\n";
- }
-
- viennacl::generator::compound_node< const char*, add_type, const char* > value(const char* arg1);
- };
-
- template <class T, class OP>
- struct make_code<T, OP, T, typename enable_if<is_inner_product_leaf<T> >::type>
- {
- private:
- typedef typename tree_utils::extract_if<T,is_pure_inner_product_leaf>::Result::Head ARG;
- typedef typename ARG::LHS LHS;
- typedef typename ARG::RHS RHS;
-
- public:
-
- static const std::string value()
- {
- return "sum = 0;\n"
- "local float " + ARG::name() + "_sum;\n"
- "for (unsigned int i = get_local_id(0) ; i<get_num_groups(0) ; i+=get_local_size(0))\n"
- "{\n"
- " sum+= " +ARG::name() +"[i];\n"
- "};\n"
- "shared_memory_ptr[get_local_id(0)]=sum;\n"
- "for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)\n"
- " {\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " if (get_local_id(0) < stride)\n"
- " shared_memory_ptr[get_local_id(0)] += shared_memory_ptr[get_local_id(0)+stride];\n"
- " }\n"
- "if(get_local_id(0)==0);\n"
- +ARG::name() + "_sum = shared_memory_ptr[0];\n"
- "barrier(CLK_LOCAL_MEM_FENCE);\n";
- }
- };
-
- }
-
-}
-
-#endif
-
-
diff --git a/viennacl/generator/make_code/make_code.hpp b/viennacl/generator/make_code/make_code.hpp
deleted file mode 100644
index d4ef013..0000000
--- a/viennacl/generator/make_code/make_code.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef VIENNACL_GENERATOR_MAKE_CODE_MAKE_CODE_HPP
-#define VIENNACL_GENERATOR_MAKE_CODE_MAKE_CODE_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/make_code/make_code.hpp
- * @brief Convenience header file for the code generation step
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include "viennacl/generator/make_code/inner_product.hpp"
-#include "viennacl/generator/make_code/matrix-vector_product.hpp"
-#include "viennacl/generator/make_code/regular_compound_node.hpp"
-#include "viennacl/generator/make_code/expression.hpp"
-
-#endif
-
-
diff --git a/viennacl/generator/make_code/matrix-vector_product.hpp b/viennacl/generator/make_code/matrix-vector_product.hpp
deleted file mode 100644
index 14d0dfa..0000000
--- a/viennacl/generator/make_code/matrix-vector_product.hpp
+++ /dev/null
@@ -1,143 +0,0 @@
-#ifndef VIENNACL_GENERATOR_MAKE_CODE_MATRIX_VECTOR_PRODUCT_HPP
-#define VIENNACL_GENERATOR_MAKE_CODE_MATRIX_VECTOR_PRODUCT_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/make_code/matrix-vector_product.hpp
- * @brief Directives for generating code for the matrix-vector product
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/generator/make_code/expression.hpp"
-#include "viennacl/generator/meta_tools/utils.hpp"
-#include "viennacl/generator/meta_tools/typelist.hpp"
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-#include "viennacl/generator/tree_operations.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- template<class T, class OP, class ASSIGNED>
- struct make_product_code;
-
- template<class T, class SIZE_DESCRIPTOR, class OP, class ASSIGNED>
- struct make_product_code<result_of::vector_expression<T,SIZE_DESCRIPTOR>, OP, ASSIGNED>
- {
- private:
- typedef typename tree_utils::remove_if<T, is_pure_product_leaf>::Result SCALAR_EXPR;
- typedef typename tree_utils::extract_if<T,is_pure_product_leaf>::Result::Head ARG;
- typedef typename generate_tokens<compound_node<NullType,assign_type,SCALAR_EXPR>, false>::Result Tokens;
- typedef typename ARG::LHS LHS;
- typedef typename ARG::RHS RHS;
- typedef typename result_of::expression_type<LHS>::Result MatExpr;
- typedef typename MatExpr::ScalarType ScalarType;
- typedef typename MatExpr::Layout Layout;
-
- static const unsigned int Alignment = result_of::expression_type<LHS>::Result::Alignment;
-
- static const std::string assign_res(Int2Type<true>)
- {
- return ASSIGNED::name() + "[ row ]" + OP::expression_string() + "dot_prod ;";
- }
-
- static const std::string assign_res(Int2Type<false>)
- {
- return ASSIGNED::name() + "[ row ]" + OP::expression_string() + make_expression_code<SCALAR_EXPR>::value ( "k" ) + "* dot_prod ;";
- }
-
- static const std::string expression_string()
- {
- return make_expression_code<LHS>::value() + "*" + make_expression_code<RHS>::value();
- }
-
- static const std::string fill_ith_row(viennacl::row_major)
- {
- std::string internal_size_2_expression = MatExpr::internal_size2_expression();
- if(Alignment==1)
- return " dot_prod += " + dot_product<LHS,RHS>::value("row *" + internal_size_2_expression + " + col","col") + ";\n";
- else if (Alignment == 16)
- return " unsigned int scaled_row = row * " + to_string(Alignment) + ";\n"
- + "dot_prod.s0 += " + dot_product<LHS,RHS>::value("scaled_row *" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s1 += " + dot_product<LHS,RHS>::value("(scaled_row+1)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s2 += " + dot_product<LHS,RHS>::value("(scaled_row+2)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s3 += " + dot_product<LHS,RHS>::value("(scaled_row+3)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s4 += " + dot_product<LHS,RHS>::value("(scaled_row+4)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s5 += " + dot_product<LHS,RHS>::value("(scaled_row+5)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s6 += " + dot_product<LHS,RHS>::value("(scaled_row+6)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s7 += " + dot_product<LHS,RHS>::value("(scaled_row+7)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s8 += " + dot_product<LHS,RHS>::value("(scaled_row+8)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.s9 += " + dot_product<LHS,RHS>::value("(scaled_row+9)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.sa += " + dot_product<LHS,RHS>::value("(scaled_row+10)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.sb += " + dot_product<LHS,RHS>::value("(scaled_row+11)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.sc += " + dot_product<LHS,RHS>::value("(scaled_row+12)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.sd += " + dot_product<LHS,RHS>::value("(scaled_row+13)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.se += " + dot_product<LHS,RHS>::value("(scaled_row+14)*" + internal_size_2_expression + " + col","col") + ";\n"
- + " dot_prod.sf += " + dot_product<LHS,RHS>::value("(scaled_row+15)*" + internal_size_2_expression + " + col","col") + ";\n";
- else
- return "ALIGNMENT NOT IMPLEMENTED";
- }
-
- static const std::string fill_ith_row(viennacl::column_major)
- {
- std::string internal_size_1_expression = MatExpr::internal_size1_expression();
- VIENNACL_STATIC_ASSERT(Alignment==1);
- return " dot_prod += " + dot_product<LHS,RHS>::value("row + col * " + internal_size_1_expression, "col") + ";\n";
-
- // if(Alignment==1)
- // return " dot_prod += " + dot_product<LHS,RHS>::value("row + col * " + internal_size_1_expression, "col") + ";\n"; ;
- // else
- // return "ALIGNMENT NOT IMPLEMENTED";
- //
- }
-
- public:
- static const std::string value()
- {
- return
- "for (unsigned int row = get_global_id(0) ; row < " + MatExpr::internal_size1_expression() + " ; row += get_global_size(0))\n"
- "{\n"
- + print_type<ScalarType,Alignment>::value()+" dot_prod = 0;\n"
- "for (unsigned int col = 0; col < " + MatExpr::internal_size2_expression() + "; ++col){\n"
- + fill_ith_row(Layout() )
- + "}\n"
- + assign_res ( Int2Type<is_null_type<SCALAR_EXPR>::value>() ) + "\n"
- + "}\n";
- }
-
- };
-
- template <class T, class OP, class ASSIGNED>
- struct make_code<T, OP, ASSIGNED, typename enable_if<is_product_leaf<T> >::type>
- {
- static const std::string value()
- {
- typedef typename result_of::expression_type<T>::Result U;
- return make_product_code<U,OP,ASSIGNED>::value();
- }
- };
-
- }
-}
-
-#endif
-
-
diff --git a/viennacl/generator/make_code/regular_compound_node.hpp b/viennacl/generator/make_code/regular_compound_node.hpp
deleted file mode 100644
index bb13afd..0000000
--- a/viennacl/generator/make_code/regular_compound_node.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#ifndef MAKE_CODE_REGULAR_COMPOUND_NODE_HPP
-#define MAKE_CODE_REGULAR_COMPOUND_NODE_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/make_code/regular_compound_node.hpp
- * @brief Directives for generating code for the matrix-vector product
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include "expression.hpp"
-#include "viennacl/generator/meta_tools/utils.hpp"
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_matrix.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- template <class T>
- struct get_loop_bound_impl;
-
- template <class T, class SIZE_DESCRIPTOR>
- struct get_loop_bound_impl<result_of::vector_expression<T, SIZE_DESCRIPTOR> >
- {
- static const std::string value()
- {
- return result_of::vector_expression<T, SIZE_DESCRIPTOR>::internal_size_expression();
- }
- };
-
- template <class T, class SIZE1_DESCRIPTOR, class SIZE2_DESCRIPTOR>
- struct get_loop_bound_impl<result_of::matrix_expression<T, SIZE1_DESCRIPTOR, SIZE2_DESCRIPTOR> >
- {
- private:
- typedef result_of::matrix_expression<T, SIZE1_DESCRIPTOR, SIZE2_DESCRIPTOR> Arg;
-
- public:
- static const std::string value()
- {
- return Arg::internal_size2_expression() + "*" + Arg::internal_size1_expression();
- }
- };
-
- template <class T>
- struct get_loop_bound
- {
- static const std::string value()
- {
- return get_loop_bound_impl<typename result_of::expression_type<T>::Result>::value();
- }
- };
-
- template <class T, class ASSIGN_OP, class ASSIGNED, class Enable>
- struct make_code
- {
- static const std::string value()
- {
- return "for ( unsigned int k = get_global_id(0)"
- " ; k < " + get_loop_bound<ASSIGNED>::value()
- +" ; k += get_global_size(0) ) \n"
- + "{\n"
- + make_expression_code<ASSIGNED>::value("k") + ASSIGN_OP::expression_string() + make_expression_code<T>::value("k") + ";\n"
- + "}\n";
- }
- };
-
- template<class T, class ASSIGN_OP, unsigned int ASSIGNED_ID, class ASSIGNED_TYPE>
- struct make_code<T, ASSIGN_OP, gpu_symbolic_scalar<ASSIGNED_ID,ASSIGNED_TYPE> >
- {
- private:
- typedef gpu_symbolic_scalar<ASSIGNED_ID,ASSIGNED_TYPE> ASSIGNED;
- public:
- static const std::string value()
- {
- return "if(get_global_id(0) == 0) "
- + make_expression_code<ASSIGNED>::value("0") + '\n'
- + ASSIGN_OP::expression_string() + make_expression_code<T>::value ( "k" ) + ";\n" ;
- }
- };
-
- }
-}
-
-#endif
-
-
diff --git a/viennacl/generator/meta_tools/typelist.hpp b/viennacl/generator/meta_tools/typelist.hpp
deleted file mode 100644
index a5f17bd..0000000
--- a/viennacl/generator/meta_tools/typelist.hpp
+++ /dev/null
@@ -1,386 +0,0 @@
-#ifndef VIENNACL_GENERATOR_META_TOOLS_TYPELIST_HPP
-#define VIENNACL_GENERATOR_META_TOOLS_TYPELIST_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file typelist.hpp
- * @brief Generic implementation of the typelist
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/generator/meta_tools/utils.hpp"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-
-
-namespace viennacl
-{
- namespace generator
- {
- template <class T,class U>
- struct typelist
- {
- typedef T Head;
- typedef U Tail;
-
- static const std::string name()
- {
- return Head::name() + " ; " + Tail::name();
- }
- };
-
- namespace typelist_utils
- {
-
- /*
- * Is Empty
- */
-
- template
- <
- typename T1 = NullType, typename T2 = NullType, typename T3 = NullType,
- typename T4 = NullType, typename T5 = NullType, typename T6 = NullType,
- typename T7 = NullType, typename T8 = NullType, typename T9 = NullType,
- typename T10 = NullType, typename T11 = NullType, typename T12 = NullType,
- typename T13 = NullType, typename T14 = NullType, typename T15 = NullType,
- typename T16 = NullType, typename T17 = NullType, typename T18 = NullType
- >
- struct make_typelist
- {
- private:
- typedef typename make_typelist
- <
- T2 , T3 , T4 ,
- T5 , T6 , T7 ,
- T8 , T9 , T10,
- T11, T12, T13,
- T14, T15, T16,
- T17, T18
- >
- ::Result TailResult;
-
- public:
- typedef typelist<T1, TailResult> Result;
- };
-
- template <>
- struct make_typelist<>
- {
- typedef NullType Result;
- };
-
- template <class TList>
- struct is_empty
- {
- enum { value = 0 };
- };
-
- template <>
- struct is_empty<NullType>
- {
- enum { value = 1 };
- };
-
-
- /*
- * FOREACH
- */
-
-
- template <class TList,template<class> class Functor>
- struct ForEach;
-
- template <template<class> class Functor>
- struct ForEach<NullType,Functor>
- {
- static void execute() {}
-
- template <class T1>
- static void execute(T1 & t1) {}
-
- template <class T1, class T2>
- static void execute(T1 & t1, T2 & t2) {}
-
- template <class T1, class T2, class T3>
- static void execute(T1 & t1, T2 & t2, T3 & t3) {}
-
- template <class T1, class T2, class T3, class T4>
- static void execute(T1 & t1, T2 & t2, T3 & t3, T4 & t4) {}
-
- template <class T1, class T2, class T3, class T4, class T5>
- static void execute(T1 & t1, T2 & t2, T3 & t3, T4 & t4, T5 & t5) {}
-
- };
-
- template <class T, class U,template<class> class Functor>
- struct ForEach< typelist<T, U>, Functor >
- {
- static void execute()
- {
- Functor<T>::execute();
- ForEach<U, Functor>::execute();
- }
-
- template <class T1>
- static void execute(T1 & t1)
- {
- Functor<T>::execute(t1);
- ForEach<U,Functor>::execute(t1);
- }
-
- template <class T1, class T2>
- static void execute(T1 & t1, T2 &t2)
- {
- Functor<T>::execute(t1,t2);
- ForEach<U,Functor>::execute(t1,t2);
- }
-
- template <class T1, class T2, class T3>
- static void execute(T1 & t1, T2 & t2, T3 & t3)
- {
- Functor<T>::execute(t1,t2,t3);
- ForEach<U,Functor>::execute(t1,t2,t3);
- }
-
- template <class T1, class T2, class T3, class T4>
- static void execute(T1 & t1, T2 & t2, T3 & t3, T4 & t4)
- {
- Functor<T>::execute(t1,t2,t3,t4);
- ForEach<U,Functor>::execute(t1,t2,t3,t4);
- }
-
- template <class T1, class T2, class T3, class T4, class T5>
- static void execute(T1 & t1, T2 & t2, T3 & t3, T4 & t4, T5 & t5)
- {
- Functor<T>::execute(t1,t2,t3,t4,t5);
- ForEach<U,Functor>::execute(t1,t2,t3,t4,t5);
- }
- };
-
-
- /*
- * length
- */
-
-
- template <class TList>
- struct length;
-
- template <>
- struct length<NullType>
- {
- enum { value = 0 };
- };
-
- template <class T, class U>
- struct length< typelist<T, U> >
- {
- enum { value = 1 + length<U>::value };
- };
-
- /*
- * type_at
- */
-
- template <class TList, unsigned int i>
- struct type_at;
-
- template <class Head, class Tail>
- struct type_at<typelist<Head, Tail>, 0>
- {
- typedef Head Result;
- };
-
- template <class Head, class Tail, unsigned int i>
- struct type_at<typelist<Head, Tail>, i>
- {
- typedef typename type_at<Tail, i - 1>::Result Result;
- };
-
- /*
- * index_of
- */
-
- template <class TList, class T>
- struct index_of;
-
- template <class T>
- struct index_of<NullType, T>
- {
- enum { value = -1 };
- };
-
- template <class T, class Tail>
- struct index_of<typelist<T, Tail>, T>
- {
- enum { value = 0 };
- };
-
- template <class Head, class Tail, class T>
- struct index_of<typelist<Head, Tail>, T>
- {
- private:
- enum { temp = index_of<Tail, T>::value };
-
- public:
- enum { value = temp == -1 ? -1 : 1 + temp };
- };
-
- /*
- * append
- */
-
- template <class T1, class T2>
- struct compare1
- {
- enum { value = static_cast<int> ( T1::id ) < static_cast<int> ( T2::id ) };
- };
-
- template <class T>
- struct compare1<NullType, T>
- {
- enum { value = 0 };
- };
-
-
- template <class T1, class T2>
- struct true_comp
- {
- enum { value = 1 };
- };
-
- template <class TList, class T, template<class,class> class Compare = true_comp>
- struct append;
-
- template <template<class,class> class Compare>
- struct append<NullType, NullType, Compare>
- {
- typedef NullType Result;
- };
-
- template <class T, template<class,class> class Compare>
- struct append<NullType, T, Compare>
- {
- typedef typelist<T,NullType> Result;
- };
-
- template <class Head, class Tail, template<class,class> class Compare>
- struct append<NullType, typelist<Head, Tail>, Compare >
- {
- typedef typelist<Head, Tail> Result;
- };
-
- template <class Head, class Tail, template<class,class> class Compare>
- struct append<typelist<Head, Tail>, NullType, Compare >
- {
- typedef typelist<Head, Tail> Result;
- };
-
- template <class Head, class Tail, class T, template<class,class> class Compare>
- struct append<typelist<Head,Tail>, T, Compare>
- {
- private:
- typedef typelist<Head, typename append<Tail, T, Compare>::Result> TypeCompareFalse;
- typedef typelist<T, typelist<Head,Tail> > TypeCompareTrue;
-
- public:
- typedef typename get_type_if<TypeCompareTrue,TypeCompareFalse,Compare<T,Head>::value >::Result Result;
- };
-
- /*
- * fuse
- */
-
- template <class TList, class T, template<class,class> class Compare = true_comp>
- struct fuse
- {
- typedef typename append<TList, T, Compare>::Result Result;
- };
-
- template <class Head1, class Tail1, class Head2, class Tail2, template<class,class> class Compare >
- struct fuse<typelist<Head1, Tail1>, typelist<Head2,Tail2>, Compare >
- {
- private:
- typedef typename append< typelist<Head1,Tail1> , Head2, Compare>::Result NewResult;
-
- public:
- typedef typename fuse< NewResult, Tail2, Compare >::Result Result;
- };
-
- /*
- * erase
- */
-
-
- template<class TList, class T>
- struct erase;
-
- template <class T>
- struct erase<NullType, T>
- {
- typedef NullType Result;
- };
-
- template <class T, class Tail>
- struct erase<typelist<T, Tail>, T>
- {
- typedef Tail Result;
- };
-
- template <class Head, class Tail, class T>
- struct erase<typelist<Head, Tail>, T>
- {
- typedef typelist<Head,
- typename erase<Tail, T>::Result> Result;
- };
-
- template <class Head, class Tail, class Head2, class Tail2>
- struct erase<typelist<Head, Tail>, typelist<Head2, Tail2> >
- {
- typedef typename erase< typename erase<typelist<Head,Tail>, Head2>::Result, Tail2 >::Result Result;
- };
-
- /*
- * No duplicate
- */
-
- template<class TList>
- struct no_duplicates;
-
- template <>
- struct no_duplicates<NullType>
- {
- typedef NullType Result;
- };
-
- template <class Head, class Tail>
- struct no_duplicates< typelist<Head, Tail> >
- {
- private:
- typedef typename no_duplicates<Tail>::Result L1;
- typedef typename erase<L1, Head>::Result L2;
-
- public:
- typedef typelist<Head, L2> Result;
- };
-
- }
- }
-}
-
-#endif
diff --git a/viennacl/generator/meta_tools/utils.hpp b/viennacl/generator/meta_tools/utils.hpp
deleted file mode 100644
index 28cf935..0000000
--- a/viennacl/generator/meta_tools/utils.hpp
+++ /dev/null
@@ -1,290 +0,0 @@
-#ifndef VIENNACL_GENERATOR_META_TOOLS_UTILS_HPP
-#define VIENNACL_GENERATOR_META_TOOLS_UTILS_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/meta_tools/utils.hpp
- * @brief Various metaprogramming utilities
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include <algorithm>
-#include <typeinfo>
-#include <iostream>
-#include "viennacl/matrix.hpp"
-
-#define VIENNACL_STATIC_ASSERT( x ) typedef char __STATIC_ASSERT__[( x )?1:-1]
-
-namespace viennacl
-{
-
- class any;
-
- template<class T>
- T any_cast(any& a);
-
- class value_base
- {
- public:
- virtual ~value_base() { }
- virtual value_base* clone() const = 0;
- virtual std::type_info const & type() const = 0;
- };
-
- template <class T>
- class value : public value_base
- {
- friend T any_cast<>(any& a);
-
- T t;
-
- public:
- value(const T& t_) : t(t_) { }
- value_base* clone() const
- {
- return new value(t);
- }
-
- std::type_info const &type() const {
- return typeid(T);
- }
- };
-
- class any
- {
- template<class T>
- friend T any_cast(any & a);
-
- value_base* v;
-
- public:
- any() : v(0) { }
-
- template <class value_type>
- any(const value_type& v_) : v(new value<value_type>(v_)) { }
-
- any(any const & other) : v(other.v ? other.v->clone() : 0) {}
-
- any& operator=(const any& other)
- {
- if(&other != this)
- {
- any copy(other);
- swap(copy);
- }
- return *this;
- }
-
- void swap(any& other)
- {
- std::swap(v, other.v);
- }
-
- std::type_info const & type()
- {
- return v->type();
- }
-
- ~any() { delete v; }
- };
-
- class bad_any_cast : public std::bad_cast
- {
- public:
- virtual const char * what() const throw()
- {
- return "viennacl::bad_any_cast: "
- "failed conversion using viennacl::any_cast";
- }
- };
-
- template <class T>
- T any_cast(any& a)
- {
- value<T>* v = dynamic_cast<value<T>*>(a.v);
-
- if(v == 0)
- throw bad_any_cast();
- else
- return v->t;
- }
-
-
- namespace generator
- {
- struct NullType
- {
- static const std::string name()
- {
- return "Null\n" ;
- }
- };
-
- template <class T>
- inline std::string to_string ( T const t )
- {
- std::stringstream ss;
- ss << t;
- return ss.str();
- }
-
- inline std::string to_string(viennacl::row_major const) { return "rowmajor"; }
- inline std::string to_string(viennacl::column_major const) { return "columnmajor"; }
-
-
- template <int v>
- struct Int2Type
- {
- enum { value = v };
- };
-
- template<class TypeTrue, class TypeFalse, bool cond>
- struct get_type_if
- {
- typedef TypeTrue Result;
- };
-
- template<class TypeTrue, class TypeFalse>
- struct get_type_if<TypeTrue, TypeFalse, false>
- {
- typedef TypeFalse Result;
- };
-
- template<class T, class U>
- struct are_same_type
- {
- enum { value = 0 };
- };
-
- template<class T>
- struct are_same_type<T,T>
- {
- enum { value = 1 };
- };
-
-
-
- template <bool B, class T = void>
- struct enable_if_c
- {
- typedef T type;
- };
-
- template <class T>
- struct enable_if_c<false, T> {};
-
- template <class Cond, class T = void>
- struct enable_if : public enable_if_c<Cond::value, T> {};
-
-
- template <bool B, class T = void>
- struct disable_if_c
- {
- typedef T type;
- };
-
- template <class T>
- struct disable_if_c<true, T> {};
-
- template <class Cond, class T = void>
- struct disable_if : public disable_if_c<Cond::value, T> {};
-
-
-
- template<class T>
- struct print_align1_type;
-
- template<>
- struct print_align1_type<int>
- {
- static const std::string value() { return "int"; }
- };
-
- template<>
- struct print_align1_type<unsigned int>
- {
- static const std::string value() { return "unsigned int"; }
- };
-
- template<>
- struct print_align1_type<long>
- {
- static const std::string value() { return "long"; }
- };
-
- template<>
- struct print_align1_type<unsigned long>
- {
- static const std::string value() { return "long"; }
- };
-
- template<>
- struct print_align1_type<float>
- {
- static const std::string value() { return "float"; }
- };
-
- template<>
- struct print_align1_type<double>
- {
- static const std::string value() { return "double"; }
- };
-
- template<typename T, unsigned int ALIGNMENT>
- struct print_aligned_type
- {
- static const std::string value()
- {
- return print_align1_type<T>::value() + to_string ( ALIGNMENT );
- }
- };
-
- template<typename T>
- struct print_aligned_type<T, 1>
- {
- static const std::string value()
- {
- return print_align1_type<T>::value();
- }
- };
-
- template<typename T, unsigned int ALIGNMENT>
- struct print_type
- {
- static const std::string value()
- {
- return print_aligned_type<T,ALIGNMENT>::value();
- }
- };
-
- template<typename T, unsigned int ALIGNMENT>
- struct print_type<T*, ALIGNMENT>
- {
- static const std::string value()
- {
- return print_type<T,ALIGNMENT>::value() + "*" ;
- }
- };
-
- }
-}
-
-#endif
-
-
diff --git a/viennacl/generator/operation_types.hpp b/viennacl/generator/operation_types.hpp
deleted file mode 100644
index f946a62..0000000
--- a/viennacl/generator/operation_types.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-#ifndef VIENNACL_GENERATOR_OPERATION_TYPES_HPP
-#define VIENNACL_GENERATOR_OPERATION_TYPES_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/operation_types.hpp
- * @brief Declaration of the types related to the operators
- */
-
-#include <sstream>
-
-namespace viennacl
-{
- namespace generator
- {
-
- struct assign_type
- {
- static const std::string expression_string() { return " = "; }
- static const std::string name() { return "eq"; }
- };
-
- struct add_type
- {
- static const std::string expression_string() { return " + "; }
- static const std::string name() { return "p"; }
- };
-
- struct inplace_add_type
- {
- static const std::string expression_string() { return " += "; }
- static const std::string name() { return "p_eq"; }
- };
-
- struct sub_type
- {
- static const std::string expression_string() { return " - "; }
- static const std::string name() { return "m"; }
- };
-
- struct inplace_sub_type
- {
- static const std::string expression_string() { return " -= "; }
- static const std::string name() { return "m_eq"; }
- };
-
- struct scal_mul_type
- {
- static const std::string expression_string() { return " * "; }
- static const std::string name() { return "mu"; }
- };
-
- struct inplace_scal_mul_type
- {
- static const std::string expression_string() { return " *= "; }
- static const std::string name() { return "mu_eq"; }
- };
-
-
- struct scal_div_type
- {
- static const std::string expression_string() { return " / "; }
- static const std::string name() { return "d"; }
- };
-
- struct inplace_scal_div_type
- {
- static const std::string expression_string() { return " /= "; }
- static const std::string name() { return "d_eq"; }
- };
-
- struct inner_prod_type
- {
- static const std::string expression_string() { return "_i_"; }
- static const std::string name() { return "i"; }
- };
-
- struct prod_type
- {
- static const std::string expression_string() { return "_p_"; }
- static const std::string name() { return "p"; }
- };
-
- template<class T>
- struct make_inplace
- {
- typedef T Result;
- };
-
- template<>
- struct make_inplace<add_type>
- {
- typedef inplace_add_type Result;
- };
-
- template<>
- struct make_inplace<sub_type>
- {
- typedef inplace_sub_type Result;
- };
-
- template<>
- struct make_inplace<scal_mul_type>
- {
- typedef inplace_scal_mul_type Result;
- };
-
- template<>
- struct make_inplace<scal_div_type>
- {
- typedef inplace_scal_div_type Result;
- };
-
- }
-}
-#endif
diff --git a/viennacl/generator/symbolic_types/convenience_typedef.hpp b/viennacl/generator/symbolic_types/convenience_typedef.hpp
deleted file mode 100644
index 4929f3e..0000000
--- a/viennacl/generator/symbolic_types/convenience_typedef.hpp
+++ /dev/null
@@ -1,176 +0,0 @@
-#ifndef VIENNACL_GENERATOR_SYMBOLIC_TYPES_CONVENIENCE_TYPEDEF_HPP
-#define VIENNACL_GENERATOR_SYMBOLIC_TYPES_CONVENIENCE_TYPEDEF_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file convenience_typedef.hpp
- * @brief Convenience typedefs for quick creation of symbolic types
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include "viennacl/generator/symbolic_types/symbolic_vector.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_matrix.hpp"
-#include "viennacl/generator/symbolic_types/symbolic_scalars.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- //Symbolic vectors : float
-
- typedef symbolic_vector<0,float,1> symv_0_f;
- typedef symbolic_vector<1,float,1> symv_1_f;
- typedef symbolic_vector<2,float,1> symv_2_f;
- typedef symbolic_vector<3,float,1> symv_3_f;
- typedef symbolic_vector<4,float,1> symv_4_f;
- typedef symbolic_vector<5,float,1> symv_5_f;
- typedef symbolic_vector<6,float,1> symv_6_f;
-
- typedef symbolic_vector<0,float,4> symv_0_f_4;
- typedef symbolic_vector<1,float,4> symv_1_f_4;
- typedef symbolic_vector<2,float,4> symv_2_f_4;
- typedef symbolic_vector<3,float,4> symv_3_f_4;
- typedef symbolic_vector<4,float,4> symv_4_f_4;
- typedef symbolic_vector<5,float,4> symv_5_f_4;
- typedef symbolic_vector<6,float,4> symv_6_f_4;
-
- typedef symbolic_vector<0,float,16> symv_0_f_16;
- typedef symbolic_vector<1,float,16> symv_1_f_16;
- typedef symbolic_vector<2,float,16> symv_2_f_16;
- typedef symbolic_vector<3,float,16> symv_3_f_16;
- typedef symbolic_vector<4,float,16> symv_4_f_16;
- typedef symbolic_vector<5,float,16> symv_5_f_16;
- typedef symbolic_vector<6,float,16> symv_6_f_16;
-
-
- //Symbolic vectors : double
-
- typedef symbolic_vector<0,double,1> symv_0_d;
- typedef symbolic_vector<1,double,1> symv_1_d;
- typedef symbolic_vector<2,double,1> symv_2_d;
- typedef symbolic_vector<3,double,1> symv_3_d;
- typedef symbolic_vector<4,double,1> symv_4_d;
- typedef symbolic_vector<5,double,1> symv_5_d;
- typedef symbolic_vector<6,double,1> symv_6_d;
-
- typedef symbolic_vector<0,double,4> symv_0_d_4;
- typedef symbolic_vector<1,double,4> symv_1_d_4;
- typedef symbolic_vector<2,double,4> symv_2_d_4;
- typedef symbolic_vector<3,double,4> symv_3_d_4;
- typedef symbolic_vector<4,double,4> symv_4_d_4;
- typedef symbolic_vector<5,double,4> symv_5_d_4;
- typedef symbolic_vector<6,double,4> symv_6_d_4;
-
- typedef symbolic_vector<0,double,16> symv_0_d_16;
- typedef symbolic_vector<1,double,16> symv_1_d_16;
- typedef symbolic_vector<2,double,16> symv_2_d_16;
- typedef symbolic_vector<3,double,16> symv_3_d_16;
- typedef symbolic_vector<4,double,16> symv_4_d_16;
- typedef symbolic_vector<5,double,16> symv_5_d_16;
- typedef symbolic_vector<6,double,16> symv_6_d_16;
-
-
- //Symbolic matrices : float
-
- typedef symbolic_matrix<0,float,viennacl::row_major,1> symm_0_f;
- typedef symbolic_matrix<1,float,viennacl::row_major,1> symm_1_f;
- typedef symbolic_matrix<2,float,viennacl::row_major,1> symm_2_f;
- typedef symbolic_matrix<3,float,viennacl::row_major,1> symm_3_f;
- typedef symbolic_matrix<4,float,viennacl::row_major,1> symm_4_f;
- typedef symbolic_matrix<5,float,viennacl::row_major,1> symm_5_f;
- typedef symbolic_matrix<6,float,viennacl::row_major,1> symm_6_f;
-
- typedef symbolic_matrix<0,float,viennacl::row_major,16> symm_0_f_r_16;
- typedef symbolic_matrix<1,float,viennacl::row_major,16> symm_1_f_r_16;
- typedef symbolic_matrix<2,float,viennacl::row_major,16> symm_2_f_r_16;
- typedef symbolic_matrix<3,float,viennacl::row_major,16> symm_3_f_r_16;
- typedef symbolic_matrix<4,float,viennacl::row_major,16> symm_4_f_r_16;
- typedef symbolic_matrix<5,float,viennacl::row_major,16> symm_5_f_r_16;
- typedef symbolic_matrix<6,float,viennacl::row_major,16> symm_6_f_r_16;
-
-
- //Symbolic matrices : double
-
- typedef symbolic_matrix<0,double,viennacl::row_major,1> symm_0_d;
- typedef symbolic_matrix<1,double,viennacl::row_major,1> symm_1_d;
- typedef symbolic_matrix<2,double,viennacl::row_major,1> symm_2_d;
- typedef symbolic_matrix<3,double,viennacl::row_major,1> symm_3_d;
- typedef symbolic_matrix<4,double,viennacl::row_major,1> symm_4_d;
- typedef symbolic_matrix<5,double,viennacl::row_major,1> symm_5_d;
- typedef symbolic_matrix<6,double,viennacl::row_major,1> symm_6_d;
-
- typedef symbolic_matrix<0,double,viennacl::row_major,16> symm_0_d_r_16;
- typedef symbolic_matrix<1,double,viennacl::row_major,16> symm_1_d_r_16;
- typedef symbolic_matrix<2,double,viennacl::row_major,16> symm_2_d_r_16;
- typedef symbolic_matrix<3,double,viennacl::row_major,16> symm_3_d_r_16;
- typedef symbolic_matrix<4,double,viennacl::row_major,16> symm_4_d_r_16;
- typedef symbolic_matrix<5,double,viennacl::row_major,16> symm_5_d_r_16;
- typedef symbolic_matrix<6,double,viennacl::row_major,16> symm_6_d_r_16;
-
-
- //CPU Symbolic scalar: float
-
- typedef cpu_symbolic_scalar<0,float> c_syms_0_f;
- typedef cpu_symbolic_scalar<1,float> c_syms_1_f;
- typedef cpu_symbolic_scalar<2,float> c_syms_2_f;
- typedef cpu_symbolic_scalar<3,float> c_syms_3_f;
- typedef cpu_symbolic_scalar<4,float> c_syms_4_f;
- typedef cpu_symbolic_scalar<5,float> c_syms_5_f;
- typedef cpu_symbolic_scalar<6,float> c_syms_6_f;
-
-
- //CPU Symbolic scalar: double
-
- typedef cpu_symbolic_scalar<0,double> c_syms_0_d;
- typedef cpu_symbolic_scalar<1,double> c_syms_1_d;
- typedef cpu_symbolic_scalar<2,double> c_syms_2_d;
- typedef cpu_symbolic_scalar<3,double> c_syms_3_d;
- typedef cpu_symbolic_scalar<4,double> c_syms_4_d;
- typedef cpu_symbolic_scalar<5,double> c_syms_5_d;
- typedef cpu_symbolic_scalar<6,double> c_syms_6_d;
-
-
- //GPU Symbolic scalar: float
-
- typedef gpu_symbolic_scalar<0,float> syms_0_f;
- typedef gpu_symbolic_scalar<1,float> syms_1_f;
- typedef gpu_symbolic_scalar<2,float> syms_2_f;
- typedef gpu_symbolic_scalar<3,float> syms_3_f;
- typedef gpu_symbolic_scalar<4,float> syms_4_f;
- typedef gpu_symbolic_scalar<5,float> syms_5_f;
- typedef gpu_symbolic_scalar<6,float> syms_6_f;
-
-
- //GPU Symbolic scalar: double
-
- typedef gpu_symbolic_scalar<0,double> syms_0_d;
- typedef gpu_symbolic_scalar<1,double> syms_1_d;
- typedef gpu_symbolic_scalar<2,double> syms_2_d;
- typedef gpu_symbolic_scalar<3,double> syms_3_d;
- typedef gpu_symbolic_scalar<4,double> syms_4_d;
- typedef gpu_symbolic_scalar<5,double> syms_5_d;
- typedef gpu_symbolic_scalar<6,double> syms_6_d;
-
-
- }
-}
-
-#endif
-
-
diff --git a/viennacl/generator/symbolic_types/symbolic_matrix.hpp b/viennacl/generator/symbolic_types/symbolic_matrix.hpp
deleted file mode 100644
index 3f474f6..0000000
--- a/viennacl/generator/symbolic_types/symbolic_matrix.hpp
+++ /dev/null
@@ -1,156 +0,0 @@
-#ifndef VIENNACL_GENERATOR_SYMBOLIC_TYPES_SYMBOLIC_MATRIX_HPP
-#define VIENNACL_GENERATOR_SYMBOLIC_TYPES_SYMBOLIC_MATRIX_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/symbolic_types/symbolic_matrix.hpp
- * @brief Implementation of a symbolic matrix type
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-
-#include "viennacl/forwards.h"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/meta_tools/utils.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- /**
- * @brief Symbolic matrix type
- *
- * @tparam ID The argument ID of the matrix in the generated code
- * @tparam SCALARTYPE The Scalartype of the matrix in the generated code
- * @tparam F The Layout of the matrix in the generated code
- * @tparam ALIGNMENT The Alignment of the matrix in the generated code
- */
- template<unsigned int ID, typename SCALARTYPE, class F, unsigned int ALIGNMENT>
- class symbolic_matrix
- {
- typedef symbolic_matrix<ID, SCALARTYPE, F, ALIGNMENT> self_type;
-
- public:
-
- enum { id = ID };
-
- typedef SCALARTYPE ScalarType;
-
- typedef F Layout;
-
- static const unsigned int Alignment = ALIGNMENT;
-
- typedef viennacl::matrix<ScalarType,F,Alignment> runtime_type;
-
- static const std::string name()
- {
- F layout;
- return "m_a_" + viennacl::generator::to_string(layout) + "_"
- + viennacl::generator::to_string(Alignment) + "_"
- + viennacl::generator::to_string<long>(id);
- }
-
- static const std::string size1_name()
- {
- return "size1_" + name();
- }
-
- static const std::string size2_name()
- {
- return "size2_" + name();
- }
-
- static const std::string internal_size1_name()
- {
- return "internal_size1_" + name();
- }
-
- static const std::string internal_size2_name()
- {
- return "internal_size2_" + name();
- }
-
- static const std::string kernel_arguments()
- {
- return " __global " + generator::print_type<SCALARTYPE*,Alignment>::value() + " " + name()
- + ", unsigned int " + size1_name()
- + ", unsigned int " + size2_name()
- + ", unsigned int " + internal_size1_name()
- + ", unsigned int " + internal_size2_name()
- + "\n";
- }
-
- template<typename RHS_TYPE>
- typename enable_if<generator::is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, assign_type, RHS_TYPE > >::type
- operator= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,assign_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<generator::is_scalar_expression<RHS_TYPE>,
- compound_node<self_type, inplace_scal_mul_type, RHS_TYPE > >::type
- operator*= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_scal_mul_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<generator::is_scalar_expression<RHS_TYPE>,
- compound_node<self_type, inplace_scal_div_type, RHS_TYPE > >::type
- operator/= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_scal_div_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<generator::is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, inplace_add_type, RHS_TYPE > >::type
- operator+= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_add_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<generator::is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, inplace_sub_type, RHS_TYPE > >::type
- operator-= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_sub_type,RHS_TYPE >();
- }
-
- operator compound_node<self_type,assign_type,self_type>()
- {
- return compound_node<self_type,assign_type,self_type>();
- }
- };
-
- template<unsigned int ID,typename SCALARTYPE, class F, unsigned int ALIGNMENT>
- class tmp_symbolic_matrix<symbolic_matrix<ID,SCALARTYPE,F,ALIGNMENT> > {};
-
- } // namespace generator
-} // namespace viennacl
-
-#endif
-
-
diff --git a/viennacl/generator/symbolic_types/symbolic_scalars.hpp b/viennacl/generator/symbolic_types/symbolic_scalars.hpp
deleted file mode 100644
index 0776da1..0000000
--- a/viennacl/generator/symbolic_types/symbolic_scalars.hpp
+++ /dev/null
@@ -1,176 +0,0 @@
-#ifndef VIENNACL_GENERATOR_SYMBOLIC_TYPES_SYMBOLIC_SCALARS_HPP
-#define VIENNACL_GENERATOR_SYMBOLIC_TYPES_SYMBOLIC_SCALARS_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/symbolic_types/symbolic_scalars.hpp
- * @brief Implementation of the symbolic scalar types.
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/scalar.hpp"
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- ///////////////////////////////////////
- /////// REGULAR SYM SCALARS //////////
- //////////////////////////////////////
-
- /**
- * @brief Symbolic scalar type. Will be passed by value.
- *
- * @tparam ID The argument ID of the scalar in the generated code
- * @tparam SCALARTYPE The Scalartype of the scalar in the generated code
- */
- template <unsigned int ID, typename SCALARTYPE>
- class cpu_symbolic_scalar
- {
- private:
- typedef cpu_symbolic_scalar<ID,SCALARTYPE> self_type;
-
- public:
-
- typedef SCALARTYPE ScalarType;
-
- typedef ScalarType runtime_type;
-
- enum { id = ID };
-
- static const std::string name()
- {
- std::ostringstream oss;
- oss << "c_s" << ID ;
- return oss.str();
- }
-
- static const std::string kernel_arguments()
- {
- return print_type<SCALARTYPE,1>::value() + " " + name() + "\n";
- }
- };
-
- /**
- * @brief Symbolic scalar type. Will be passed by pointer.
- *
- * @tparam ID The argument ID of the scalar in the generated code
- * @tparam SCALARTYPE The Scalartype of the scalar in the generated code
- */
- template <unsigned int ID, typename SCALARTYPE>
- class gpu_symbolic_scalar
- {
- private:
- typedef gpu_symbolic_scalar<ID,SCALARTYPE> self_type;
-
- public:
-
- typedef SCALARTYPE ScalarType;
-
- typedef viennacl::scalar<ScalarType> runtime_type;
-
- enum { id = ID };
-
- static const std::string name()
- {
- std::ostringstream oss;
- oss << "g_s" << ID ;
- return oss.str();
- }
-
- static const std::string kernel_arguments()
- {
- return "__global " + print_type<SCALARTYPE*,1>::value() + " " + name() + "\n" ;
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, assign_type, RHS_TYPE > >::type
- operator= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,assign_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_scalar_expression<RHS_TYPE>,
- compound_node<self_type, inplace_scal_mul_type, RHS_TYPE > >::type
- operator*= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_scal_mul_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_scalar_expression<RHS_TYPE>,
- compound_node<self_type, inplace_scal_div_type, RHS_TYPE > >::type
- operator/= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_scal_div_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, inplace_add_type, RHS_TYPE > >::type
- operator+= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_add_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, inplace_sub_type, RHS_TYPE > >::type
- operator-= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_sub_type,RHS_TYPE >();
- }
-
- operator compound_node<self_type,assign_type,self_type>()
- {
- return compound_node<self_type,assign_type,self_type>();
- }
- };
-
- ///////////////////////////////////////
- ///////// SCALAR MULTIPLICATION ///////
- //////////////////////////////////////
-
- /** @brief Scalar multiplication operator */
- template<class LHS_TYPE, class RHS_TYPE>
- typename enable_if_c<is_scalar_expression<LHS_TYPE>::value || is_scalar_expression<RHS_TYPE>::value,
- compound_node<LHS_TYPE,scal_mul_type,RHS_TYPE> >::type
- operator* ( LHS_TYPE const & lhs, RHS_TYPE const & rhs )
- {
- return compound_node<LHS_TYPE, scal_mul_type,RHS_TYPE> ();
- }
-
- /** @brief Scalar division operator */
- template<class LHS_TYPE, class RHS_TYPE>
- typename enable_if_c< is_scalar_expression<RHS_TYPE>::value,
- compound_node<LHS_TYPE,scal_div_type,RHS_TYPE> > ::type
- operator/ ( LHS_TYPE const & lhs, RHS_TYPE const & rhs )
- {
- return compound_node<LHS_TYPE,scal_div_type,RHS_TYPE> ();
- }
-
- }
-}
-#endif
diff --git a/viennacl/generator/symbolic_types/symbolic_vector.hpp b/viennacl/generator/symbolic_types/symbolic_vector.hpp
deleted file mode 100644
index b4c1a16..0000000
--- a/viennacl/generator/symbolic_types/symbolic_vector.hpp
+++ /dev/null
@@ -1,179 +0,0 @@
-#ifndef VIENNACL_GENERATOR_SYMBOLIC_TYPES_SYMBOLIC_VECTOR_HPP
-#define VIENNACL_GENERATOR_SYMBOLIC_TYPES_SYMBOLIC_VECTOR_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/symbolic_types/symbolic_vector.hpp
- * @brief Implementation of a symbolic vector type
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/vector.hpp"
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-#include "viennacl/generator/traits/result_of.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- /**
- * @brief Symbolic vector type
- *
- * @tparam ID The argument ID of the vector in the generated code
- * @tparam SCALARTYPE The Scalartype of the vector in the generated code
- * @tparam ALIGNMENT The Alignment of the vector in the generated code
- */
- template <unsigned int ID, typename SCALARTYPE, unsigned int ALIGNMENT>
- class symbolic_vector
- {
- private:
- typedef symbolic_vector<ID,SCALARTYPE,ALIGNMENT> self_type;
-
- public:
-
- typedef SCALARTYPE ScalarType;
-
- static const unsigned int Alignment = ALIGNMENT;
-
- typedef viennacl::vector<ScalarType,Alignment> runtime_type;
-
- static const unsigned int id = ID;
-
- static const std::string name()
- {
- return "v_a" + to_string(Alignment) + "_" + to_string(ID);
- }
-
- static const std::string size2_name()
- {
- return "size_"+name();
- }
-
- static const std::string internal_size2_name()
- {
- return "internal_size_"+name();
- }
-
- static const std::string name_argument()
- {
- return " __global " + print_type<SCALARTYPE*,Alignment>::value() + " " + name();
- }
-
- static const std::string kernel_arguments()
- {
- return " __global " + print_type<SCALARTYPE*,Alignment>::value() + " " + name()
- + ", unsigned int " + size2_name()
- + ", unsigned int " + internal_size2_name() + "\n" ;
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, assign_type, RHS_TYPE > >::type
- operator= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,assign_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_scalar_expression<RHS_TYPE>,
- compound_node<self_type, inplace_scal_mul_type, RHS_TYPE > >::type
- operator*= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_scal_mul_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_scalar_expression<RHS_TYPE>,
- compound_node<self_type, inplace_scal_div_type, RHS_TYPE > >::type
- operator/= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_scal_div_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, inplace_add_type, RHS_TYPE > >::type
- operator+= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_add_type,RHS_TYPE >();
- }
-
- template<typename RHS_TYPE>
- typename enable_if<is_same_expression_type<self_type,RHS_TYPE>,
- compound_node<self_type, inplace_sub_type, RHS_TYPE > >::type
- operator-= ( RHS_TYPE const & rhs ) const
- {
- return compound_node<self_type,inplace_sub_type,RHS_TYPE >();
- }
-
- operator compound_node<self_type,assign_type,self_type>()
- {
- return compound_node<self_type,assign_type,self_type>();
- }
- };
-
- template< unsigned int ID,class SCALARTYPE,unsigned int ALIGNMENT>
- class tmp_symbolic_vector<symbolic_vector<ID,SCALARTYPE,ALIGNMENT> >
- {
- typedef symbolic_vector<ID,SCALARTYPE,ALIGNMENT> ARG;
-
- public:
- typedef SCALARTYPE ScalarType;
-
- typedef typename symbolic_vector<ID,SCALARTYPE,ALIGNMENT>::runtime_type runtime_type;
-
- static const unsigned int Alignment = ALIGNMENT;
-
- static const unsigned int id = ID;
-
-
- static const std::string name()
- {
- return "tmp_" + ARG::name();
- }
-
- static const std::string size2_name()
- {
- return "size_"+name();
- }
-
- static const std::string internal_size2_name()
- {
- return "internal_size_"+name();
- }
-
- static const std::string name_argument()
- {
- return " __global " + print_type<SCALARTYPE*,Alignment>::value() + " " + name();
- }
-
- static const std::string kernel_arguments()
- {
- return " __global " + print_type<SCALARTYPE*,Alignment>::value() + " " + name()
- + ", unsigned int " + size2_name()
- + ", unsigned int " + internal_size2_name() + "\n" ;
- }
- };
- }
-}
-
-#endif
-
diff --git a/viennacl/generator/tokens_management.hpp b/viennacl/generator/tokens_management.hpp
deleted file mode 100644
index 5a45597..0000000
--- a/viennacl/generator/tokens_management.hpp
+++ /dev/null
@@ -1,107 +0,0 @@
-#ifndef VIENNACL_GENERATOR_TOKENS_MANAGEMENT_HPP
-#define VIENNACL_GENERATOR_TOKENS_MANAGEMENT_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/tokens_management.hpp
- * @brief Creation and management of the tokens list
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/generator/compound_node.hpp"
-#include "viennacl/generator/operation_types.hpp"
-#include "viennacl/generator/tree_operations.hpp"
-#include "viennacl/generator/meta_tools/typelist.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
-
- /////////////////////////////
- ///////// TOKENS ///////////
- ////////////////////////////
-
- template<class T, bool is_in_temporary_kernel, class TList = NullType, class TokenOp = add_type, class Enable = void>
- struct extract_tokens
- {
- typedef TList Result;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary, bool is_in_temporary_kernel, class TList, class TokenOp>
- struct extract_tokens<compound_node<LHS, OP, RHS, is_temporary>,
- is_in_temporary_kernel,
- TList,
- TokenOp>
- {
- private:
- typedef compound_node<LHS,OP,RHS,is_temporary> T;
- typedef typename extract_tokens<LHS, is_in_temporary_kernel, TList, TokenOp>::Result LHS_Result;
- typedef typename extract_tokens<RHS, is_in_temporary_kernel, TList, OP>::Result RHS_Result;
- typedef typename typelist_utils::fuse<RHS_Result,LHS_Result>::Result ResultFalse;
- typedef typename typelist_utils::append<TList, std::pair<T,TokenOp> >::Result ResulTrue;
-
- public:
- typedef typename get_type_if<ResulTrue,ResultFalse,is_product_leaf<T>::value>::Result Result;
- };
-
- template <class TList,bool make_operator_inplace>
- struct tokenize_operators
- {
- private:
- typedef typename TList::Head Head;
- typedef typename get_type_if<typename make_inplace<typename Head::second_type>::Result,
- assign_type,
- make_operator_inplace>::Result NewOperator;
- typedef std::pair<typename Head::first_type, NewOperator> NewHead;
- typedef typename TList::Tail Tail;
- typedef typename tokenize_operators<Tail, true>::Result NewTail;
-
- public:
- typedef typelist<NewHead, NewTail> Result;
- };
-
- template <bool make_operator_inplace>
- struct tokenize_operators<NullType,make_operator_inplace>
- {
- typedef NullType Result;
- };
-
-
-
- template <class T, bool is_in_temporary_kernel>
- struct generate_tokens;
-
- template <class LHS,class OP, class RHS, bool is_temporary, bool is_in_temporary_kernel>
- struct generate_tokens<compound_node<LHS,OP,RHS,is_temporary>, is_in_temporary_kernel>
- {
- private:
- typedef typename tree_utils::remove_if<RHS, is_product_leaf>::Result NewTree;
- typedef std::pair<NewTree,OP> LinearToken;
- typedef typename extract_tokens<RHS, is_in_temporary_kernel>::Result Products;
- typedef typename tokenize_operators<Products,
- !is_null_type<NewTree>::value>::Result TokenizedProducts;
-
- public:
- typedef typelist<LinearToken,TokenizedProducts> Result;
- };
-
- }
-}
-#endif
diff --git a/viennacl/generator/traits/general_purpose_traits.hpp b/viennacl/generator/traits/general_purpose_traits.hpp
deleted file mode 100644
index d3387a2..0000000
--- a/viennacl/generator/traits/general_purpose_traits.hpp
+++ /dev/null
@@ -1,250 +0,0 @@
-#ifndef VIENNACL_GENERATOR_TRAITS_GENERAL_PURPOSE_TRAITS_HPP
-#define VIENNACL_GENERATOR_TRAITS_GENERAL_PURPOSE_TRAITS_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/traits/general_purpose_traits.hpp
- * @brief Provides a set of metafunctions for the identification of types
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include "viennacl/generator/operation_types.hpp"
-#include "viennacl/generator/forwards.h"
-#include "viennacl/generator/traits/result_of.hpp"
-#include "viennacl/generator/meta_tools/typelist.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
- template <class T>
- struct is_scalar_expression_impl
- {
- enum { value = 0 };
- };
-
- template <class T>
- struct is_scalar_expression_impl<result_of::scalar_expression<T> >
- {
- enum { value = 1};
- };
-
- template <class T>
- struct is_scalar_expression
- {
- enum { value = is_scalar_expression_impl<typename result_of::expression_type<T>::Result >::value };
- };
-
- template <class T>
- struct is_temporary
- {
- enum { value = 0 } ;
- };
-
- template <class LHS, class OP, class RHS>
- struct is_temporary<compound_node<LHS,OP,RHS,true> >
- {
- enum {value = 1};
- };
-
- template <class REF>
- struct is_temporary<tmp_symbolic_vector<REF> >
- {
- enum { value = 1};
- };
-
- template <class T>
- struct is_temporary_kernel_parameter
- {
- enum { value = is_temporary<T>::value };
- };
-
- template <class T>
- struct is_temporary_kernel_parameter<inner_prod_impl_t<T> >
- {
- enum { value = 1 };
- };
-
-
- template <class T>
- struct is_regular_kernel_parameter
- {
- enum { value = 0 };
- };
-
- template <unsigned int ID,class SCALARTYPE, unsigned int ALIGNMENT>
- struct is_regular_kernel_parameter<symbolic_vector<ID,SCALARTYPE,ALIGNMENT> >
- {
- enum { value = 1 };
- };
-
- template <unsigned int ID,class SCALARTYPE, class F, unsigned int ALIGNMENT>
- struct is_regular_kernel_parameter<symbolic_matrix<ID,SCALARTYPE,F,ALIGNMENT> >
- {
- enum { value = 1 };
- };
-
- template <unsigned int ID, class SCALARTYPE>
- struct is_regular_kernel_parameter<cpu_symbolic_scalar<ID, SCALARTYPE> >
- {
- enum { value = 1 };
- };
-
- template <unsigned int ID, class SCALARTYPE>
- struct is_regular_kernel_parameter<gpu_symbolic_scalar<ID, SCALARTYPE> >
- {
- enum { value = 1 };
- };
-
-
-
- template <class T>
- struct is_pure_inner_product_leaf
- {
- enum { value = 0};
- };
-
- template <class LHS,class RHS, bool is_temporary>
- struct is_pure_inner_product_leaf<compound_node<LHS,inner_prod_type,RHS, is_temporary> >
- {
- enum { value = 1};
- };
-
- template <class T>
- struct is_inner_product_leaf
- {
- enum { value = is_pure_inner_product_leaf<T>::value };
- };
-
- template <class LHS,class RHS>
- struct is_inner_product_leaf<compound_node<LHS,scal_mul_type,RHS> >
- {
- enum { value = ( is_inner_product_leaf<LHS>::value && is_scalar_expression<RHS>::value && !is_inner_product_leaf<RHS>::value )
- || ( is_inner_product_leaf<RHS>::value && is_scalar_expression<LHS>::value &&!is_inner_product_leaf<LHS>::value )
- };
- };
-
- template <class LHS,class RHS>
- struct is_inner_product_leaf<compound_node<LHS,scal_div_type,RHS> >
- {
- enum { value = ( is_inner_product_leaf<LHS>::value && is_scalar_expression<RHS>::value && !is_inner_product_leaf<RHS>::value )
- || ( is_inner_product_leaf<RHS>::value && is_scalar_expression<LHS>::value &&!is_inner_product_leaf<LHS>::value )
- };
- };
-
- template <class T>
- struct is_pure_product_leaf
- {
- enum { value = 0};
- };
-
- template <class LHS,class RHS, bool is_temporary>
- struct is_pure_product_leaf<compound_node<LHS,prod_type,RHS, is_temporary> >
- {
- enum { value = 1};
- };
-
- template <class T>
- struct is_product_leaf
- {
- enum { value = is_pure_product_leaf<T>::value };
- };
-
- template <class LHS,class RHS>
- struct is_product_leaf<compound_node<LHS,scal_mul_type,RHS> >
- {
- enum { value = is_product_leaf<LHS>::value
- ||is_product_leaf<RHS>::value
- };
- };
-
- template <class T>
- struct is_null_type
- {
- enum { value = 0 };
- };
-
- template <>
- struct is_null_type<NullType>
- {
- enum { value = 1 };
- };
-
- template <class T>
- struct is_compound
- {
- enum { value = 0 } ;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary>
- struct is_compound<compound_node<LHS,OP,RHS,is_temporary> >
- {
- enum {value = 1};
- };
-
- template <class EXPR1, class EXPR2>
- struct is_same_expression_type_impl
- {
- enum { value = 0 };
- };
-
- template <class EXPR1, class DESCRIPTOR1, class EXPR2, class DESCRIPTOR2>
- struct is_same_expression_type_impl<result_of::vector_expression<EXPR1,DESCRIPTOR1>,
- result_of::vector_expression<EXPR2,DESCRIPTOR2> >
- {
- private:
- typedef result_of::vector_expression<EXPR1,DESCRIPTOR1> LHS;
- typedef result_of::vector_expression<EXPR2,DESCRIPTOR2> RHS;
- public:
- enum { value = LHS::Alignment == RHS::Alignment };
- };
-
- template <class EXPR1, class LHS_DESCRIPTOR1, class RHS_DESCRIPTOR1,
- class EXPR2, class LHS_DESCRIPTOR2, class RHS_DESCRIPTOR2>
- struct is_same_expression_type_impl<result_of::matrix_expression<EXPR1,LHS_DESCRIPTOR1,RHS_DESCRIPTOR1>,
- result_of::matrix_expression<EXPR2,LHS_DESCRIPTOR2,RHS_DESCRIPTOR2> >
- {
- private:
- typedef result_of::matrix_expression<EXPR1,LHS_DESCRIPTOR1,RHS_DESCRIPTOR1> LHS;
- typedef result_of::matrix_expression<EXPR2,LHS_DESCRIPTOR2,RHS_DESCRIPTOR2> RHS;
-
- public:
- enum { value = LHS::Alignment == RHS::Alignment };
- };
-
- template <class EXPR1, class EXPR2>
- struct is_same_expression_type_impl<result_of::scalar_expression<EXPR1>,
- result_of::scalar_expression<EXPR2> >
- {
- enum { value = 1 };
- };
-
- template<class EXPR1, class EXPR2>
- struct is_same_expression_type
- {
- enum { value = is_same_expression_type_impl<typename result_of::expression_type<EXPR1>::Result,
- typename result_of::expression_type<EXPR2>::Result>::value
- };
- };
-
- }
-}
-
-#endif
-
-
diff --git a/viennacl/generator/traits/result_of.hpp b/viennacl/generator/traits/result_of.hpp
deleted file mode 100644
index a4a7d59..0000000
--- a/viennacl/generator/traits/result_of.hpp
+++ /dev/null
@@ -1,591 +0,0 @@
-#ifndef VIENNACL_GENERATOR_TRAITS_RESULT_OF_HPP
-#define VIENNACL_GENERATOR_TRAITS_RESULT_OF_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/traits/result_of.hpp
- * @brief Provides a set of metafunctions for type deductions within the kernel generator framework.
- *
- * Generator code contributed by Philippe Tillet
- */
-
-#include <string>
-
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-#include "viennacl/generator/forwards.h"
-#include "viennacl/generator/meta_tools/utils.hpp"
-#include "viennacl/generator/elementwise_modifier.hpp"
-#include "viennacl/ocl/local_mem.hpp"
-#include "viennacl/ocl/handle.hpp"
-#include "viennacl/forwards.h"
-#include "CL/cl.h"
-
-namespace viennacl
-{
- namespace generator
- {
- namespace result_of
- {
-
- class runtime_wrapper
- {
- protected:
- bool is_temporary_;
- std::string name_;
- int arg_id_;
-
- public:
- runtime_wrapper(bool _is_temporary, std::string const & _name, int _arg_id)
- : is_temporary_(_is_temporary), name_(_name), arg_id_(_arg_id) {}
-
- bool is_temporary() const { return is_temporary_; }
- int arg_id() const { return arg_id_; }
- std::string name() const { return name_; }
-
- virtual void enqueue(unsigned int arg_pos,
- viennacl::ocl::kernel & k,
- std::map<unsigned int, viennacl::any> & runtime_args,
- std::map<std::string, viennacl::ocl::handle<cl_mem> > & temporaries) = 0;
- };
-
- class shared_memory_wrapper : public runtime_wrapper
- {
- public:
- shared_memory_wrapper() : runtime_wrapper(true, "shared_memory_ptr", -1 ){ }
-
- void enqueue(unsigned int arg_pos,
- viennacl::ocl::kernel & k,
- std::map<unsigned int, viennacl::any> & runtime_args,
- std::map<std::string, viennacl::ocl::handle<cl_mem> > & temporaries)
- {
- unsigned int lmem_size = k.local_work_size();
- k.arg(arg_pos, viennacl::ocl::local_mem(lmem_size*sizeof(float)));
- }
-
- };
-
- template <class T, class SIZE_T>
- struct vector_runtime_wrapper : public runtime_wrapper
- {
- private:
- unsigned int size_id_;
-
- template<typename ScalarType, unsigned int Alignment>
- typename SIZE_T::size_type size(viennacl::vector<ScalarType,Alignment> * size_arg) { return size_arg->size(); }
-
- template<typename ScalarType, class F, unsigned int Alignment>
- typename SIZE_T::size_type size(viennacl::matrix<ScalarType,F,Alignment> * size_arg) { return size_arg->size2(); }
-
- template<typename ScalarType, unsigned int Alignment>
- typename SIZE_T::size_type internal_size(viennacl::vector<ScalarType,Alignment> * size_arg) { return size_arg->internal_size(); }
-
- template<typename ScalarType, class F, unsigned int Alignment>
- typename SIZE_T::size_type internal_size(viennacl::matrix<ScalarType,F,Alignment> * size_arg) { return size_arg->internal_size2(); }
-
- public:
- vector_runtime_wrapper(bool _is_temporary, std::string const & _name, int _arg_id, unsigned int _size_id)
- : runtime_wrapper(_is_temporary,_name,_arg_id),size_id_(_size_id) {}
-
- void enqueue(unsigned int arg_pos,
- viennacl::ocl::kernel & k,
- std::map<unsigned int, viennacl::any> & runtime_args,
- std::map<std::string,
- viennacl::ocl::handle<cl_mem> > & temporaries)
- {
- SIZE_T * size_arg = viennacl::any_cast<SIZE_T * >(runtime_args[size_id_]);
- viennacl::ocl::handle<cl_mem> handle = NULL;
- if(is_temporary_)
- {
- if(temporaries.find(name_)==temporaries.end())
- {
- temporaries.insert(
- std::make_pair(name_,
- viennacl::ocl::handle<cl_mem>(
- viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
- size_arg->internal_size()*sizeof(typename T::value_type))
- )
- )
- );
- }
- handle = temporaries[name_];
- }
- else
- {
- T * current_arg = viennacl::any_cast<T * >(runtime_args[arg_id_]);
- handle = current_arg->handle();
- }
- k.arg(arg_pos, handle );
- k.arg(arg_pos+1,cl_uint(size(size_arg)));
- k.arg(arg_pos+2,cl_uint(internal_size(size_arg)));
- }
- };
-
- template <class T, class SIZE_DESCRIPTOR>
- struct vector_expression : public runtime_wrapper
- {
- typedef T type;
- typedef typename SIZE_DESCRIPTOR::ScalarType ScalarType;
- static const unsigned int Alignment = SIZE_DESCRIPTOR::Alignment;
-
- static runtime_wrapper * runtime_descriptor()
- {
- return new vector_runtime_wrapper<viennacl::vector<ScalarType,Alignment>,
- typename SIZE_DESCRIPTOR::runtime_type>(viennacl::generator::is_temporary<T>::value,
- T::name(),
- T::id,SIZE_DESCRIPTOR::id);
- }
-
- static const std::string size_expression()
- {
- return SIZE_DESCRIPTOR::size2_name();
- }
-
- static const std::string internal_size_expression()
- {
- return SIZE_DESCRIPTOR::internal_size2_name() + "/" + to_string(Alignment);
- }
- };
-
- template <class T, class SIZE1_T, class SIZE2_T>
- struct matrix_runtime_wrapper : public runtime_wrapper
- {
- private:
- unsigned int size1_id_;
- unsigned int size2_id_;
- public:
- matrix_runtime_wrapper(bool _is_temporary,
- std::string const & _name,
- int _arg_id,
- unsigned int _size1_id,
- unsigned int _size2_id)
- : runtime_wrapper(_is_temporary,_name,_arg_id), size1_id_(_size1_id), size2_id_(_size2_id) {}
-
- unsigned int n_elements(){ return size1_id_*size2_id_; }
-
- void enqueue(unsigned int arg_pos,
- viennacl::ocl::kernel & k,
- std::map<unsigned int, viennacl::any> & runtime_args,
- std::map<std::string,
- viennacl::ocl::handle<cl_mem> > & temporaries)
- {
- if (is_temporary_) {}
-
- T * current_arg = any_cast<T * >(runtime_args[arg_id_]);
- SIZE1_T * size1_arg = any_cast<SIZE1_T * >(runtime_args[size1_id_]);
- SIZE2_T * size2_arg = any_cast<SIZE2_T * >(runtime_args[size2_id_]);
- k.arg(arg_pos, current_arg->handle());
- k.arg(arg_pos+1,cl_uint(size1_arg->size1()));
- k.arg(arg_pos+2,cl_uint(size2_arg->size2()));
- k.arg(arg_pos+3,cl_uint(size1_arg->internal_size1()));
- k.arg(arg_pos+4,cl_uint(size2_arg->internal_size2()));
- }
- };
-
- template <class T, class SIZE1_DESCRIPTOR, class SIZE2_DESCRIPTOR>
- struct matrix_expression
- {
- typedef typename SIZE1_DESCRIPTOR::ScalarType ScalarType;
- typedef typename SIZE1_DESCRIPTOR::Layout Layout;
- static const unsigned int Alignment = SIZE1_DESCRIPTOR::Alignment;
-
- static runtime_wrapper * runtime_descriptor()
- {
- return new matrix_runtime_wrapper<viennacl::matrix<ScalarType,Layout,Alignment>,
- typename SIZE1_DESCRIPTOR::runtime_type,
- typename SIZE2_DESCRIPTOR::runtime_type>(is_temporary<T>::value,T::name(),
- T::id,SIZE1_DESCRIPTOR::id,
- SIZE2_DESCRIPTOR::id);
- }
-
- static const std::string size1_expression()
- {
- return SIZE1_DESCRIPTOR::size1_name();
- }
-
- static const std::string size2_expression()
- {
- return SIZE2_DESCRIPTOR::size2_name();
- }
-
- static const std::string internal_size1_expression()
- {
- return SIZE1_DESCRIPTOR::internal_size1_name() + "/" + to_string(Alignment);
- }
-
- static const std::string internal_size2_expression()
- {
- return SIZE2_DESCRIPTOR::internal_size2_name() + "/" + to_string(Alignment);
- }
-
- typedef T type;
- };
-
- template <class T>
- struct scalar_size_descriptor
- {
- static unsigned int size(viennacl::ocl::kernel & k) { return 1; }
- };
-
- template <class LHS, class RHS, bool is_temporary>
- struct scalar_size_descriptor<compound_node<LHS,inner_prod_type,RHS,is_temporary> >
- {
- static unsigned int size(viennacl::ocl::kernel & k)
- {
- return k.global_work_size(0)/k.local_work_size(0);
- }
- };
-
- template <class T>
- struct scalar_runtime_wrapper: public runtime_wrapper
- {
- typedef typename T::ScalarType ScalarType;
-
- scalar_runtime_wrapper(bool _is_temporary, std::string const & _name, int _arg_id) : runtime_wrapper(_is_temporary,_name,_arg_id){}
-
- void enqueue(unsigned int arg_pos,
- viennacl::ocl::kernel & k,
- std::map<unsigned int,
- viennacl::any> & runtime_args,
- std::map<std::string,
- viennacl::ocl::handle<cl_mem> > & temporaries)
- {
- if(is_temporary_)
- {
- if(temporaries.find(name_)==temporaries.end())
- {
- temporaries.insert(
- std::make_pair(name_,
- viennacl::ocl::handle<cl_mem>(
- viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
- scalar_size_descriptor<T>::size(k)*sizeof(ScalarType))
- )
- )
- );
- }
- k.arg(arg_pos, temporaries[name_]);
- }
-
- if(arg_id_==-2)
- k.arg(arg_pos, temporaries[name_]);
- else
- {
- viennacl::scalar<ScalarType>* current_arg = any_cast<viennacl::scalar<ScalarType> * >(runtime_args[arg_id_]);
- k.arg(arg_pos, current_arg->handle());
- }
-
- }
- };
-
- template <unsigned int ID, class ScalarType>
- struct scalar_runtime_wrapper<viennacl::generator::cpu_symbolic_scalar<ID, ScalarType> >: public runtime_wrapper
- {
- scalar_runtime_wrapper(bool _is_temporary, std::string const & _name, int _arg_id) : runtime_wrapper(_is_temporary,_name,_arg_id){ }
-
- void enqueue(unsigned int arg_pos,
- viennacl::ocl::kernel & k,
- std::map<unsigned int, viennacl::any> & runtime_args,
- std::map<std::string, viennacl::ocl::handle<cl_mem> > & temporaries)
- {
- ScalarType* current_arg = any_cast<ScalarType * >(runtime_args[arg_id_]);
- k.arg(arg_pos, cl_float(*current_arg));
- }
- };
-
- template <class T>
- struct scalar_expression
- {
- typedef typename T::ScalarType ScalarType;
-
- static runtime_wrapper * runtime_descriptor()
- {
- return new scalar_runtime_wrapper<T>(is_temporary<T>::value,T::name(),T::id);
- }
- };
-
- /*
- * Compound Nodes - General case
- */
- template <class T>
- struct expression_type
- {
- typedef NullType Result;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary>
- struct expression_type<compound_node<LHS,OP,RHS,is_temporary> >
- {
- private:
- typedef typename expression_type<LHS>::Result LHS_Result;
- typedef typename expression_type<RHS>::Result RHS_Result;
-
- public:
- typedef typename expression_type<compound_node<LHS_Result, OP, RHS_Result,is_temporary> >::Result Result;
- };
-
- /*
- * Compound Nodes - usual operators
- */
- template <class LHS, class LHS_SIZE_DESCRIPTOR ,class OP ,class RHS, class RHS_SIZE_DESCRIPTOR ,bool is_temporary>
- struct expression_type<compound_node<vector_expression<LHS,LHS_SIZE_DESCRIPTOR>,
- OP,
- vector_expression<RHS,RHS_SIZE_DESCRIPTOR>,
- is_temporary>
- >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
-
- public:
- typedef vector_expression<T, LHS_SIZE_DESCRIPTOR> Result;
- };
-
-
- template <class LHS, class LHS_SIZE1_DESCRIPTOR, class LHS_SIZE2_DESCRIPTOR,
- class OP,
- class RHS, class RHS_SIZE1_DESCRIPTOR, class RHS_SIZE2_DESCRIPTOR,
- bool is_temporary>
- struct expression_type<compound_node<matrix_expression<LHS, LHS_SIZE1_DESCRIPTOR, LHS_SIZE2_DESCRIPTOR>,
- OP,
- matrix_expression<RHS, RHS_SIZE1_DESCRIPTOR, RHS_SIZE2_DESCRIPTOR>,
- is_temporary>
- >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
-
- public:
- typedef matrix_expression<T, LHS_SIZE1_DESCRIPTOR, LHS_SIZE2_DESCRIPTOR> Result;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary>
- struct expression_type<compound_node<scalar_expression<LHS>,
- OP,
- scalar_expression<RHS>,
- is_temporary>
- >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
-
- public:
- typedef scalar_expression<T> Result;
- };
-
- /*
- * Scalar Operators
- */
- template <class LHS, class LHS_SIZE_DESCRIPTOR,
- class OP,
- class RHS,
- bool is_temporary>
- struct expression_type<compound_node<vector_expression<LHS,LHS_SIZE_DESCRIPTOR>,
- OP,
- scalar_expression<RHS>,
- is_temporary> >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
- public:
- typedef vector_expression<T, LHS_SIZE_DESCRIPTOR> Result;
- };
-
- template <class LHS,
- class OP,
- class RHS, class RHS_SIZE_DESCRIPTOR,
- bool is_temporary>
- struct expression_type<compound_node<scalar_expression<LHS>,
- OP,
- vector_expression<RHS,RHS_SIZE_DESCRIPTOR>,
- is_temporary>
- >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
- public:
- typedef vector_expression<T, RHS_SIZE_DESCRIPTOR> Result;
- };
-
-
- template <class LHS, class LHS_SIZE1_DESCRIPTOR, class LHS_SIZE2_DESCRIPTOR,
- class OP,
- class RHS, bool is_temporary>
- struct expression_type<compound_node<matrix_expression<LHS,LHS_SIZE1_DESCRIPTOR,LHS_SIZE2_DESCRIPTOR>,
- OP,
- scalar_expression<RHS>,
- is_temporary>
- >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
- public:
- typedef matrix_expression<T, LHS_SIZE1_DESCRIPTOR, LHS_SIZE2_DESCRIPTOR> Result;
- };
-
- template <class LHS,
- class OP,
- class RHS, class RHS_SIZE1_DESCRIPTOR, class RHS_SIZE2_DESCRIPTOR,
- bool is_temporary>
- struct expression_type<compound_node<scalar_expression<LHS>,
- OP,
- matrix_expression<RHS,RHS_SIZE1_DESCRIPTOR, RHS_SIZE2_DESCRIPTOR>,
- is_temporary>
- >
- {
- private:
- typedef compound_node<LHS ,OP, RHS, is_temporary> T;
- public:
- typedef matrix_expression<T, RHS_SIZE1_DESCRIPTOR, RHS_SIZE2_DESCRIPTOR> Result;
- };
-
-
- /*
- * Compound Nodes - Non Trivial Operators
- */
-
- //Matrix-Vector product
- template <class LHS, class LHS_SIZE1_DESCRIPTOR, class LHS_SIZE2_DESCRIPTOR,
- class RHS, class RHS_SIZE_DESCRIPTOR,
- bool is_temporary>
- struct expression_type<compound_node<matrix_expression<LHS, LHS_SIZE1_DESCRIPTOR, LHS_SIZE2_DESCRIPTOR>,
- prod_type,
- vector_expression<RHS,RHS_SIZE_DESCRIPTOR>,
- is_temporary>
- >
- {
- typedef vector_expression<compound_node<LHS,prod_type,RHS,is_temporary>, LHS_SIZE1_DESCRIPTOR > Result;
- };
-
- template <class T>
- struct expression_type<inner_prod_impl_t<T> >
- {
- typedef scalar_expression<T> Result;
- };
-
- //Matrix-Matrix product
- template <class LHS, class LHS_SIZE1_DESCRIPTOR, class LHS_SIZE2_DESCRIPTOR,
- class RHS, class RHS_SIZE1_DESCRIPTOR, class RHS_SIZE2_DESCRIPTOR,
- bool is_temporary>
- struct expression_type<compound_node<matrix_expression<LHS, LHS_SIZE1_DESCRIPTOR, LHS_SIZE2_DESCRIPTOR>,
- prod_type,
- matrix_expression<RHS,RHS_SIZE1_DESCRIPTOR,RHS_SIZE2_DESCRIPTOR>,
- is_temporary>
- >
- {
- typedef matrix_expression<compound_node<LHS,prod_type,RHS,is_temporary>, LHS_SIZE1_DESCRIPTOR, RHS_SIZE2_DESCRIPTOR > Result;
- };
-
- //Inner product
- template <class LHS, class LHS_SIZE_DESCRIPTOR,
- class RHS, class RHS_SIZE_DESCRIPTOR,
- bool is_temporary>
- struct expression_type< compound_node<vector_expression<LHS,LHS_SIZE_DESCRIPTOR>,
- inner_prod_type,
- vector_expression<RHS,RHS_SIZE_DESCRIPTOR>,
- is_temporary>
- >
- {
- typedef scalar_expression<compound_node<LHS,inner_prod_type,RHS,is_temporary> > Result;
- };
-
-
- /*
- * Elementwise Modifiers
- */
- template <class T, std::string (*U)()>
- struct expression_type< elementwise_modifier_impl<T,U> >
- {
- typedef typename expression_type<T>::Result Result;
- };
-
- template <class T, class SIZE_DESCRIPTOR>
- struct expression_type< vector_expression<T,SIZE_DESCRIPTOR> >
- {
- typedef typename expression_type<T>::Result Result;
- };
-
- template <class T, class SIZE1_DESCRIPTOR, class SIZE2_DESCRIPTOR>
- struct expression_type< matrix_expression<T,SIZE1_DESCRIPTOR,SIZE2_DESCRIPTOR> >
- {
- typedef typename expression_type<T>::Result Result;
- };
-
- template <class T>
- struct expression_type< scalar_expression<T> >
- {
- typedef typename expression_type<T>::Result Result;
- };
-
- /*
- * Symbolic Vectors
- */
-
- template <unsigned int ID,typename SCALARTYPE, unsigned int ALIGNMENT>
- struct expression_type< symbolic_vector<ID,SCALARTYPE,ALIGNMENT> >
- {
- typedef vector_expression<symbolic_vector<ID,SCALARTYPE,ALIGNMENT>,
- symbolic_vector<ID,SCALARTYPE,ALIGNMENT> > Result;
- };
-
- template <class Ref>
- struct expression_type<tmp_symbolic_vector<Ref> >
- {
- typedef vector_expression<tmp_symbolic_vector<Ref>, Ref> Result;
- };
-
- /*
- * Symbolic Matrices
- */
-
- template <unsigned int ID,typename SCALARTYPE, class F, unsigned int ALIGNMENT>
- struct expression_type<symbolic_matrix<ID,SCALARTYPE,F,ALIGNMENT> >
- {
- private:
- typedef symbolic_matrix<ID,SCALARTYPE,F,ALIGNMENT> T;
- public:
- typedef matrix_expression<T, T, T> Result;
- };
-
- template <class Ref>
- struct expression_type<tmp_symbolic_matrix<Ref> >
- {
- typedef matrix_expression<tmp_symbolic_matrix<Ref>, Ref, Ref > Result;
- };
-
- /*
- * Symbolic Scalars
- */
-
- template <unsigned int ID, typename SCALARTYPE>
- struct expression_type<cpu_symbolic_scalar<ID, SCALARTYPE> >
- {
- typedef scalar_expression<cpu_symbolic_scalar<ID, SCALARTYPE> > Result;
- };
-
- template <unsigned int ID, typename SCALARTYPE>
- struct expression_type<gpu_symbolic_scalar<ID, SCALARTYPE> >
- {
- typedef scalar_expression< gpu_symbolic_scalar<ID, SCALARTYPE> > Result;
- };
-
-
- }
- }
-}
-
-#endif
-
-
diff --git a/viennacl/generator/tree_operations.hpp b/viennacl/generator/tree_operations.hpp
deleted file mode 100644
index 53ec317..0000000
--- a/viennacl/generator/tree_operations.hpp
+++ /dev/null
@@ -1,487 +0,0 @@
-#ifndef VIENNACL_GENERATOR_TREE_OPERATIONS_HPP
-#define VIENNACL_GENERATOR_TREE_OPERATIONS_HPP
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/generator/tree_operations.hpp
- * @brief Functors for modifying the expression tree.
- *
- * Generator code contributed by Philippe Tillet
- */
-
-
-#include "viennacl/generator/elementwise_modifier.hpp"
-#include "viennacl/generator/traits/general_purpose_traits.hpp"
-
-namespace viennacl
-{
- namespace generator
- {
- namespace tree_utils
- {
-
- /*
- * Count if
- */
-
- template <class T, template<class> class Pred>
- struct count_if
- {
- enum { value = Pred<T>::value };
- };
-
- template <class T, std::string (*U)(), template<class> class Pred>
- struct count_if<elementwise_modifier_impl<T,U>, Pred>
- {
- enum { value = Pred<T>::value + count_if<T, Pred>::value };
- };
-
- template<class T, template<class> class Pred>
- struct count_if<inner_prod_impl_t<T>, Pred>
- {
- enum { value = Pred<inner_prod_impl_t<T> >::value + count_if<T, Pred>::value };
- };
-
-
- template<class LHS, class RHS, class OP, bool is_temporary, template<class> class Pred>
- struct count_if<compound_node<LHS,OP,RHS,is_temporary>,Pred>
- {
- private:
- typedef compound_node<LHS,OP,RHS,is_temporary> T;
-
- public:
- enum { value = Pred<T>::value
- + count_if<LHS, Pred>::value
- + count_if<RHS, Pred>::value
- };
- };
-
-
- /*
- * Count if type
- */
-
- template<class T, class Searched>
- struct count_if_type
- {
- enum { value = 0 };
- };
-
- template<class T>
- struct count_if_type<T,T>
- {
- enum { value = 1 };
- };
-
- template<class T, std::string (*U)(), class Searched>
- struct count_if_type<elementwise_modifier_impl<T,U>, Searched>
- {
- enum { value = count_if_type<T, Searched>::value };
- };
-
- template <class T, std::string (*U)()>
- struct count_if_type<elementwise_modifier_impl<T,U>, elementwise_modifier_impl<T,U> >
- {
- enum { value = 1 + count_if_type<T, elementwise_modifier_impl<T,U> >::value };
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary>
- struct count_if_type<compound_node<LHS, OP, RHS, is_temporary>,
- compound_node<LHS, OP, RHS, is_temporary> >
- {
- private:
- typedef compound_node<LHS, OP, RHS, is_temporary> T;
- public:
- enum { value = 1 + count_if_type<LHS, T>::value
- + count_if_type<RHS, T>::value
- };
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary, class Searched>
- struct count_if_type< compound_node<LHS,OP,RHS,is_temporary>, Searched>
- {
- enum { value = count_if_type<LHS, Searched>::value
- + count_if_type<RHS, Searched>::value
- };
- };
-
-
- /*
- * Expand
- */
-
- template <class LHS, class OP, bool is_temporary, class RHS_LHS, class RHS_OP, class RHS_RHS, bool RHS_is_temporary>
- struct expand_right
- {
- typedef compound_node< compound_node<LHS, OP, RHS_LHS, RHS_is_temporary>,
- RHS_OP,
- compound_node<LHS, OP, RHS_RHS, RHS_is_temporary>,
- is_temporary> Result;
- };
-
- template <class LHS_LHS, class LHS_OP, class LHS_RHS, bool LHS_is_temporary, class OP, class RHS, bool is_temporary>
- struct expand_left
- {
- typedef compound_node< compound_node<LHS_LHS, OP, RHS, LHS_is_temporary>,
- LHS_OP,
- compound_node<LHS_RHS, OP, RHS, LHS_is_temporary>,
- is_temporary> Result;
- };
-
- template <class T>
- struct expand
- {
- typedef T Result;
- };
-
- template <class T, std::string (*U)()>
- struct expand< elementwise_modifier_impl<T,U> >
- {
- private:
- typedef typename expand<T>::Result SUB_Result;
- public:
- typedef elementwise_modifier_impl<SUB_Result,U> Result;
- };
-
- template<class T>
- struct expand<inner_prod_impl_t<T> >
- {
- private:
- typedef typename expand<T>::Result SUB_Result;
- public:
- typedef inner_prod_impl_t<SUB_Result> Result;
- };
-
-
- template<class LHS,class OP,class RHS,bool is_temporary>
- struct expand< compound_node<LHS,OP,RHS,is_temporary> >
- {
- typedef compound_node<typename expand<LHS>::Result, OP, typename expand<RHS>::Result, is_temporary> Result;
- };
-
- #define make_right_expandable(__OPERATOR1__ , __OPERATOR2__) \
- template<class LHS, class RHS_LHS, class RHS_RHS, bool RHS_is_temporary, bool is_temporary>\
- struct expand< compound_node<LHS, __OPERATOR1__, compound_node<RHS_LHS, __OPERATOR2__, RHS_RHS, RHS_is_temporary>, is_temporary> >\
- {\
- typedef typename expand_right<typename expand<LHS>::Result\
- , __OPERATOR1__\
- , is_temporary\
- , typename expand<RHS_LHS>::Result\
- , __OPERATOR2__\
- , typename expand<RHS_RHS>::Result\
- , RHS_is_temporary>::Result Result;\
- }
-
- #define make_left_expandable(__OPERATOR1__ , __OPERATOR2__) \
- template<class LHS_LHS, class LHS_RHS, bool LHS_is_temporary, class RHS, bool is_temporary>\
- struct expand< compound_node< compound_node<LHS_LHS, __OPERATOR2__ , LHS_RHS , LHS_is_temporary>\
- , __OPERATOR1__\
- , RHS\
- , is_temporary> >\
- {\
- typedef typename expand_left< typename expand<LHS_LHS>::Result\
- , __OPERATOR2__\
- , typename expand<LHS_RHS>::Result\
- , LHS_is_temporary\
- , __OPERATOR1__\
- , typename expand<RHS>::Result\
- , is_temporary\
- > ::Result Result;\
- }
-
- make_right_expandable ( scal_mul_type,add_type );
- make_right_expandable ( scal_mul_type,sub_type );
- make_left_expandable ( scal_mul_type,add_type );
- make_left_expandable ( scal_mul_type,sub_type );
-
-
- #undef make_left_expandable
- #undef make_right_expandable
-
- ////////////////////////////
- // REGISTER TEMPORARIES //
- ///////////////////////////
-
- template <class T>
- struct make_temporary;
-
- template <unsigned int ID, class SCALARTYPE, unsigned int ALIGNMENT>
- struct make_temporary<symbolic_vector<ID,SCALARTYPE,ALIGNMENT> >
- {
- typedef tmp_symbolic_vector< symbolic_vector<ID,SCALARTYPE,ALIGNMENT> > Result;
- };
-
- template <unsigned int ID,typename SCALARTYPE, class F, unsigned int ALIGNMENT>
- struct make_temporary<symbolic_matrix<ID,SCALARTYPE,F,ALIGNMENT> > {
- typedef tmp_symbolic_matrix< symbolic_matrix<ID,SCALARTYPE,F,ALIGNMENT> > Result;
- };
-
- template <class T, bool only_first_order, class Assigned = void, bool is_nested = false>
- struct register_temporaries
- {
- typedef T Result;
- };
-
- template <class T, bool only_first_order>
- struct register_temporaries<T, only_first_order, T, true>
- {
- typedef typename make_temporary<T>::Result Result;
- };
-
- template <class T, std::string (*U)(), bool only_first_order, class Assigned, bool is_nested>
- struct register_temporaries<elementwise_modifier_impl<T,U>, only_first_order, Assigned, is_nested>
- {
- private:
- typedef typename register_temporaries<T, only_first_order, Assigned, is_nested>::Result SUB_Result;
- public:
- typedef elementwise_modifier_impl<SUB_Result,U> Result;
- };
-
-
- template <class LHS, class OP, class RHS, bool is_temporary, bool only_first_order, class Assigned, bool is_nested>
- struct register_temporaries<compound_node<LHS,OP,RHS,is_temporary>, only_first_order, Assigned, is_nested>
- {
- private:
- typedef compound_node<LHS,OP,RHS,is_temporary> T;
- static const bool is_non_trivial = is_pure_product_leaf<T>::value ||is_pure_inner_product_leaf<T>::value;
- typedef typename register_temporaries<LHS, only_first_order, Assigned, is_nested || is_non_trivial>::Result LHS_Result;
- typedef typename register_temporaries<RHS, only_first_order, Assigned, is_nested || is_non_trivial>::Result RHS_Result;
-
- typedef compound_node<LHS_Result,OP,RHS_Result, is_non_trivial&& ( is_temporary || is_nested ) > RecursiveResult;
- typedef compound_node<LHS,OP,RHS,true> EarlyStoppingResult;
- public:
- typedef typename get_type_if<EarlyStoppingResult, RecursiveResult,is_non_trivial && only_first_order && is_nested>::Result Result;
- };
-
-
- ////////////////////////////////
- //////// EXTRACTIF ////////
- ///////////////////////////////
-
-
- template <class T,
- template<class> class Pred,
- template<class, class> class Comp = typelist_utils::true_comp,
- class TList = NullType>
- struct extract_if
- {
- private:
- typedef typelist<T,TList> TypeTrue;
- typedef NullType TypeFalse;
- public:
- typedef typename get_type_if<TypeTrue, TypeFalse, Pred<T>::value>::Result Result;
- };
-
- template <class T,
- std::string (*U)(),
- template<class> class Pred,
- template<class,class> class Comp,
- class TList>
- struct extract_if<elementwise_modifier_impl<T,U>, Pred, Comp, TList>
- {
- private:
- typedef typename extract_if<T, Pred, Comp, TList>::Result SUB_Result;
- public:
- typedef typename typelist_utils::fuse<TList,SUB_Result>::Result Result;
- };
-
- template <class T,
- template<class> class Pred,
- template<class,class> class Comp,
- class TList>
- struct extract_if<inner_prod_impl_t<T>, Pred, Comp, TList >
- {
- private:
- typedef typename T::LHS LHS;
- typedef typename T::RHS RHS;
- typedef typename extract_if<LHS, Pred, Comp, TList>::Result LHS_Result;
- typedef typename extract_if<RHS, Pred, Comp, TList>::Result RHS_Result;
- typedef typename typelist_utils::fuse<TList, LHS_Result>::Result TmpResult1;
- typedef typename typelist_utils::fuse<TmpResult1, RHS_Result>::Result TmpResult2;
-
- typedef TmpResult2 TypeFalse;
- typedef typename typelist_utils::append<TmpResult2, inner_prod_impl_t<T> >::Result TypeTrue;
-
- public:
- typedef typename get_type_if<TypeTrue, TypeFalse, Pred< inner_prod_impl_t<T> >::value>::Result Result;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary,
- template<class> class Pred,
- template<class,class> class Comp,
- class TList>
- struct extract_if< compound_node<LHS, OP, RHS, is_temporary>, Pred, Comp, TList>
- {
- private:
- typedef compound_node<LHS,OP,RHS,is_temporary> T;
- typedef typename extract_if<LHS,Pred,Comp,TList>::Result LHS_Result;
- typedef typename extract_if<RHS,Pred,Comp,TList>::Result RHS_Result;
-
- typedef typename typelist_utils::fuse< typename typelist_utils::fuse<TList, LHS_Result, Comp>::Result,
- RHS_Result,
- Comp >::Result TypeFalse;
- typedef typelist<T, TList> TypeTrue;
- public:
- typedef typename get_type_if<TypeTrue, TypeFalse, Pred<T>::value>::Result Result;
- };
-
-
- ///////////////////////////////
- //////// FLIP_TREE ///////////
- ///////////////////////////////
-
- template <class OP, bool flip>
- struct invert_flip
- {
- enum { value = flip };
- };
-
- template <bool flip>
- struct invert_flip<sub_type, flip>
- {
- enum { value = !flip };
- };
-
- template <class OP, bool flip>
- struct flip_operator
- {
- typedef OP Result;
- };
-
- template <>
- struct flip_operator<sub_type, true>
- {
- typedef add_type Result;
- };
-
- template <>
- struct flip_operator<add_type, true>
- {
- typedef sub_type Result;
- };
-
- template <class T, bool flip = false>
- struct flip_tree
- {
- typedef T Result;
- };
-
- template <class T, std::string (*U)(), bool flip>
- struct flip_tree <elementwise_modifier_impl<T,U>, flip>
- {
- private:
- typedef typename flip_tree<T, flip>::Result SUB_Result;
- public:
- typedef elementwise_modifier_impl<SUB_Result,U> Result;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary, bool flip>
- struct flip_tree< compound_node<LHS, OP, RHS, is_temporary>, flip>
- {
- private:
- typedef typename flip_tree<LHS,flip>::Result LHS_Result;
- typedef typename flip_tree<RHS, invert_flip<OP, flip>::value >::Result RHS_Result;
-
- public:
- typedef compound_node<LHS_Result, typename flip_operator<OP, flip>::Result , RHS_Result, is_temporary> Result;
- };
-
- ////////////////////////////////
- //////// REMOVE_IF ////////////
- ///////////////////////////////
-
- template <class OP, class RHS>
- struct handle_unary_minus
- {
- typedef RHS Result;
- };
-
- template <class RHS>
- struct handle_unary_minus<sub_type, RHS>
- {
- typedef compound_node<NullType,sub_type,RHS> Result;
- };
-
- template <class T>
- struct compound_to_simple
- {
- typedef T Result;
- };
-
- template <class LHS, class OP>
- struct compound_to_simple<compound_node<LHS, OP, NullType> >
- {
- typedef LHS Result;
- };
-
- template <class OP, class RHS>
- struct compound_to_simple<compound_node<NullType, OP, RHS> >
- {
- typedef typename handle_unary_minus<OP,RHS>::Result Result;
- };
-
- template <class OP, class RHS, class Enable=void>
- struct get_new_operator
- {
- typedef OP Result;
- };
-
- template <class RHS_OP, class RHS_RHS>
- struct get_new_operator <sub_type, compound_node<NullType, RHS_OP, RHS_RHS> >
- {
- typedef RHS_OP Result;
- };
-
- template <class T, template<class> class Pred>
- struct remove_if
- {
- typedef typename get_type_if<NullType,T,Pred<T>::value>::Result Result;
- typedef typename get_type_if<NullType,T,Pred<T>::value>::Result TmpTree;
- };
-
- template <class T, std::string (*U)(), template<class> class Pred>
- struct remove_if<elementwise_modifier_impl<T,U>,Pred >
- {
- typedef elementwise_modifier_impl<typename remove_if<T,Pred>::Result, U> Result;
- };
-
- template <class LHS, class OP, class RHS, bool is_temporary, template<class> class Pred>
- struct remove_if<compound_node<LHS,OP,RHS,is_temporary>, Pred>
- {
- private:
- typedef compound_node<LHS,OP,RHS,is_temporary> T;
-
- typedef typename remove_if<LHS,Pred>::TmpTree LHS_TmpTree;
- typedef typename remove_if<RHS,Pred>::TmpTree RHS_TmpTree;
-
- typedef typename compound_to_simple<typename remove_if<LHS,Pred>::Result>::Result LHS_Result;
- typedef typename compound_to_simple<typename remove_if<RHS,Pred>::Result>::Result RHS_Result;
-
- typedef compound_node<LHS_TmpTree,OP,RHS_TmpTree> TmpTree0;
- typedef typename compound_to_simple<compound_node<LHS_Result,
- typename get_new_operator<OP,RHS_TmpTree>::Result,
- RHS_Result,
- is_temporary> >::Result Result0;
- public:
- typedef typename get_type_if<NullType, TmpTree0, Pred<T>::value>::Result TmpTree;
- typedef typename get_type_if<NullType, Result0, Pred<T>::value>::Result Result;
- };
-
- } // namespace tree_utils
- } // namespace generator
-} // namespace viennacl
-#endif
diff --git a/viennacl/linalg/coordinate_matrix_operations.hpp b/viennacl/linalg/coordinate_matrix_operations.hpp
deleted file mode 100644
index 8dd03e8..0000000
--- a/viennacl/linalg/coordinate_matrix_operations.hpp
+++ /dev/null
@@ -1,222 +0,0 @@
-#ifndef VIENNACL_COORDINATE_MATRIX_OPERATIONS_HPP_
-#define VIENNACL_COORDINATE_MATRIX_OPERATIONS_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file coordinate_matrix_operations.hpp
- @brief Implementations of operations using coordinate_matrix
-*/
-
-#include "viennacl/forwards.h"
-#include "viennacl/ocl/device.hpp"
-#include "viennacl/ocl/handle.hpp"
-#include "viennacl/ocl/kernel.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/kernels/coordinate_matrix_kernels.h"
-
-namespace viennacl
-{
- namespace linalg
- {
-
-
- // A * x
- /** @brief Returns a proxy class that represents matrix-vector multiplication with a compressed_matrix
- *
- * This is used for the convenience expression result = prod(mat, vec);
- *
- * @param mat The matrix
- * @param vec The vector
- */
- template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
- vector_expression<const coordinate_matrix<SCALARTYPE, ALIGNMENT>,
- const vector<SCALARTYPE, VECTOR_ALIGNMENT>,
- op_prod > prod_impl(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & mat,
- const vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec)
- {
- return vector_expression<const coordinate_matrix<SCALARTYPE, ALIGNMENT>,
- const vector<SCALARTYPE, VECTOR_ALIGNMENT>,
- op_prod >(mat, vec);
- }
-
- // A * x
- /** @brief Returns a proxy class that represents matrix-vector multiplication with a coordinate_matrix
- *
- * This is used for the convenience expression result = prod(mat, vec);
- *
- * @param mat The matrix
- * @param vec The vector
- * @param NUM_THREADS Number of threads per work group. Can be used for fine-tuning.
- */
- template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
- viennacl::vector_expression<const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT>,
- const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
- viennacl::op_prod > prod_impl(const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT> & mat,
- const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec,
- size_t NUM_THREADS)
- {
- return viennacl::vector_expression<const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT>,
- const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
- viennacl::op_prod >(mat, vec);
- }
-
- //namespace {
- /** @brief Carries out matrix-vector multiplication with a coordinate_matrix
- *
- * Implementation of the convenience expression result = prod(mat, vec);
- *
- * @param mat The matrix
- * @param vec The vector
- * @param result The result vector
- */
- template<class TYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
- void prod_impl(const viennacl::coordinate_matrix<TYPE, ALIGNMENT> & mat,
- const viennacl::vector<TYPE, VECTOR_ALIGNMENT> & vec,
- viennacl::vector<TYPE, VECTOR_ALIGNMENT> & result)
- {
- assert(mat.size1() == result.size());
- assert(mat.size2() == vec.size());
- result.clear();
-
- //std::cout << "prod(coordinate_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl;
-
- viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::coordinate_matrix<TYPE, ALIGNMENT>::program_name(), "vec_mul");
- unsigned int thread_num = 256; //k.local_work_size(0);
-
- k.local_work_size(0, thread_num);
-
- k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
- //k.global_work_size(0, thread_num); //Only one work group
- viennacl::ocl::enqueue(k(mat.handle12(), mat, mat.handle3(),
- vec,
- result,
- viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
- viennacl::ocl::local_mem(sizeof(TYPE)*thread_num)) );
-
- }
- //};
-
- } //namespace linalg
-
-
-
- /** @brief Implementation of the operation v1 = A * v2, where A is a matrix
- *
- * @param proxy An expression template proxy class.
- */
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- template <unsigned int MAT_ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT> &
- viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>,
- const viennacl::vector<SCALARTYPE, ALIGNMENT>,
- viennacl::op_prod> & proxy)
- {
- // check for the special case x = A * x
- if (proxy.rhs().handle().get() == this->handle().get())
- {
- viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size());
- viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
- *this = result;
- return *this;
- }
- else
- {
- viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
- return *this;
- }
- return *this;
- }
-
- //v += A * x
- /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
- *
- * @param proxy An expression template proxy class.
- */
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- template <unsigned int MAT_ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT> &
- viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>,
- const vector<SCALARTYPE, ALIGNMENT>,
- op_prod> & proxy)
- {
- vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
- viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
- *this += result;
- return *this;
- }
-
- /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
- *
- * @param proxy An expression template proxy class.
- */
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- template <unsigned int MAT_ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT> &
- viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>,
- const vector<SCALARTYPE, ALIGNMENT>,
- op_prod> & proxy)
- {
- vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
- viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
- *this -= result;
- return *this;
- }
-
-
- //free functions:
- /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
- *
- * @param proxy An expression template proxy class.
- */
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- template <unsigned int MAT_ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>,
- const vector<SCALARTYPE, ALIGNMENT>,
- op_prod> & proxy)
- {
- assert(proxy.get_lhs().size1() == size());
- vector<SCALARTYPE, ALIGNMENT> result(size());
- viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
- result += *this;
- return result;
- }
-
- /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
- *
- * @param proxy An expression template proxy class.
- */
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- template <unsigned int MAT_ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT>
- viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>,
- const vector<SCALARTYPE, ALIGNMENT>,
- op_prod> & proxy)
- {
- assert(proxy.get_lhs().size1() == size());
- vector<SCALARTYPE, ALIGNMENT> result(size());
- viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
- result = *this - result;
- return result;
- }
-
-} //namespace viennacl
-
-
-#endif
diff --git a/viennacl/linalg/lanczos.hpp~ b/viennacl/linalg/lanczos.hpp~
deleted file mode 100644
index a4f83df..0000000
--- a/viennacl/linalg/lanczos.hpp~
+++ /dev/null
@@ -1,490 +0,0 @@
-#ifndef VIENNACL_LINALG_LANCZOS_HPP_
-#define VIENNACL_LINALG_LANCZOS_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file viennacl/linalg/lanczos.hpp
-* @brief Generic interface for the Lanczos algorithm.
-*
-* Contributed by Guenther Mader and Astrid Rupp.
-*/
-
-#include <math.h> //for sqrt()
-#include <vector>
-#include "viennacl/vector.hpp"
-#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/io/matrix_market.hpp"
-#include "viennacl/linalg/bisect.hpp"
-#include <boost/random.hpp>
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/matrix_proxy.hpp>
-#include <boost/numeric/ublas/matrix_expression.hpp>
-#include <boost/numeric/ublas/matrix_sparse.hpp>
-#include <boost/numeric/ublas/vector.hpp>
-#include <boost/numeric/ublas/operation.hpp>
-#include <boost/numeric/ublas/vector_expression.hpp>
-#include <boost/numeric/ublas/io.hpp>
-
-namespace viennacl
-{
- namespace linalg
- {
-
- /** @brief A tag for the lanczos algorithm.
- */
- class lanczos_tag
- {
- public:
-
- enum
- {
- partial_reorthogonalization = 0,
- full_reorthogonalization,
- no_reorthogonalization
- };
-
- /** @brief The constructor
- *
- * @param factor Exponent of epsilon - tolerance for batches of Reorthogonalization
- * @param num_eigenvalues Number of eigenvalues to be returned
- * @param met Method for Lanczos-Algorithm: 0 for partial Reorthogonalization, 1 for full Reorthogonalization and 2 for Lanczos without Reorthogonalization
- * @param krylov_size Maximal krylov-space size
- */
-
- lanczos_tag(double factor = 0.75,
- std::size_t numeig = 10,
- int met = 0,
- std::size_t krylov = 100) : factor_(factor), num_eigenvalues_(numeig), method_(met), krylov_size_(krylov) {};
-
- /** @brief Sets the number of eigenvalues */
- void num_eigenvalues(int numeig){ num_eigenvalues_ = numeig; }
-
- /** @brief Returns the number of eigenvalues */
- std::size_t num_eigenvalues() const { return num_eigenvalues_; }
-
- /** @brief Sets the exponent of epsilon */
- void factor(double fct) { factor_ = fct; }
-
- /** @brief Returns the exponent */
- double factor() const { return factor_; }
-
- /** @brief Sets the size of the kylov space */
- void krylov_size(int max) { krylov_size_ = max; }
-
- /** @brief Returns the size of the kylov space */
- std::size_t krylov_size() const { return krylov_size_; }
-
- /** @brief Sets the reorthogonalization method */
- void method(int met){ method_ = met; }
-
- /** @brief Returns the reorthogonalization method */
- int method() const { return method_; }
-
-
- private:
- double factor_;
- std::size_t num_eigenvalues_;
- int method_; // see enum defined above for possible values
- std::size_t krylov_size_;
-
- };
-
-
- /**
- * @brief Implementation of the calculation of eigenvalues using lanczos
- *
- * @param matrix The system matrix
- * @param tag Tag with several options for the lanczos algorithm
- * @return Returns the n largest eigenvalues (n defined in the lanczos_tag)
- */
- template< typename MatrixT >
- std::vector< typename viennacl::result_of::cpu_value_type<typename MatrixT::value_type>::type >
- eig(MatrixT const & matrix, lanczos_tag const & tag)
- {
- typedef typename viennacl::result_of::value_type<MatrixT>::type ScalarType;
- typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
- typedef typename viennacl::result_of::vector_for_matrix<MatrixT>::type VectorT;
-
- boost::mt11213b mt;
- boost::normal_distribution<double> N(0, 1);
- boost::bernoulli_distribution<double> B(0.5);
- boost::triangle_distribution<double> T(-1, 0, 1);
-
- boost::variate_generator<boost::mt11213b&, boost::normal_distribution<double> > get_N(mt, N);
- boost::variate_generator<boost::mt11213b&, boost::bernoulli_distribution<double> > get_B(mt, B);
- boost::variate_generator<boost::mt11213b&, boost::triangle_distribution<double> > get_T(mt, T);
-
- std::vector<CPU_ScalarType> eigenvalues;
- std::size_t matrix_size = matrix.size1();
- VectorT r(matrix_size);
- std::vector<CPU_ScalarType> s(matrix_size);
-
- for(std::size_t i=0; i<s.size(); ++i)
- s[i] = 3.0 * get_B() + get_T() - 1.5;
-
- detail::copy_vec_to_vec(s,r);
-
- std::size_t size_krylov = (matrix_size < tag.krylov_size()) ? matrix_size
- : tag.krylov_size();
-
- switch(tag.method())
- {
- case lanczos_tag::partial_reorthogonalization:
- eigenvalues = detail::lanczosPRO(matrix, r, size_krylov, tag);
- break;
- case lanczos_tag::full_reorthogonalization:
- eigenvalues = detail::lanczosFRO(matrix, r, size_krylov, tag);
- break;
- case lanczos_tag::no_reorthogonalization:
- eigenvalues = detail::lanczos(matrix, r, size_krylov, tag);
- break;
- }
-
- std::vector<CPU_ScalarType> largest_eigenvalues;
-
- for(std::size_t i = 1; i<=tag.num_eigenvalues(); i++)
- largest_eigenvalues.push_back(eigenvalues[size_krylov-i]);
-
-
- return largest_eigenvalues;
- }
-
-
- namespace detail
- {
- /**
- * @brief Implementation of the Lanczos PRO algorithm
- *
- * @param A The system matrix
- * @param r Random start vector
- * @param size Size of krylov-space
- * @param tag Lanczos_tag with several options for the algorithm
- * @return Returns the eigenvalues (number of eigenvalues equals size of krylov-space)
- */
-
- template< typename MatrixT, typename VectorT >
- std::vector<
- typename viennacl::result_of::cpu_value_type<typename MatrixT::value_type>::type
- >
- lanczosPRO (MatrixT const& A, VectorT & r, int size, lanczos_tag const & tag)
- {
-
- typedef typename viennacl::result_of::value_type<MatrixT>::type ScalarType;
- typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
-
-
- // generation of some random numbers, used for lanczos PRO algorithm
- boost::mt11213b mt;
- boost::normal_distribution<double> N(0, 1);
- boost::bernoulli_distribution<double> B(0.5);
- boost::triangle_distribution<double> T(-1, 0, 1);
-
- boost::variate_generator<boost::mt11213b&, boost::normal_distribution<double> > get_N(mt, N);
- boost::variate_generator<boost::mt11213b&, boost::bernoulli_distribution<double> > get_B(mt, B);
- boost::variate_generator<boost::mt11213b&, boost::triangle_distribution<double> > get_T(mt, T);
-
-
- long i, j, k, index, retry, reorths;
- std::vector<long> l_bound(size/2), u_bound(size/2);
- bool second_step;
- double squ_eps, eta, temp, eps, retry_th;
- long n = r.size();
- std::vector< std::vector<CPU_ScalarType> > w(2, std::vector<CPU_ScalarType>(size));
- CPU_ScalarType cpu_beta;
-
- boost::numeric::ublas::vector<CPU_ScalarType> s(n);
-
- VectorT t(n);
- CPU_ScalarType inner_rt;
- ScalarType vcl_beta;
- ScalarType vcl_alpha;
- std::vector<CPU_ScalarType> alphas, betas;
- boost::numeric::ublas::matrix<CPU_ScalarType> Q(n, size);
-
- second_step = false;
- eps = std::numeric_limits<double>::epsilon();
- squ_eps = sqrt(eps);
- retry_th = 1e-2;
- eta = exp(log(eps) * tag.factor());
- reorths = 0;
- retry = 0;
-
- vcl_beta = viennacl::linalg::norm_2(r);
-
- r /= vcl_beta;
-
- detail::copy_vec_to_vec(r,s);
- boost::numeric::ublas::column(Q, 0) = s;
-
- VectorT u = viennacl::linalg::prod(A, r);
- vcl_alpha = viennacl::linalg::inner_prod(u, r);
- alphas.push_back(vcl_alpha);
- w[0][0] = 1;
- betas.push_back(vcl_beta);
-
- long batches = 0;
- for(i = 1;i < size; i++)
- {
- r = u - vcl_alpha * r;
- vcl_beta = viennacl::linalg::norm_2(r);
-
- betas.push_back(vcl_beta);
- r = r / vcl_beta;
-
- index = i % 2;
- w[index][i] = 1;
- k = (i + 1) % 2;
- w[index][0] = (betas[1] * w[k][1] + (alphas[0] - vcl_alpha) * w[k][0] - betas[i - 1] * w[index][0]) / vcl_beta + eps * 0.3 * get_N() * (betas[1] + vcl_beta);
-
- for(j = 1;j < i - 1;j++)
- {
- w[index][j] = (betas[j + 1] * w[k][j + 1] + (alphas[j] - vcl_alpha) * w[k][j] + betas[j] * w[k][j - 1] - betas[i - 1] * w[index][j]) / vcl_beta + eps * 0.3 * get_N() * (betas[j + 1] + vcl_beta);
- }
- w[index][i - 1] = 0.6 * eps * n * get_N() * betas[1] / vcl_beta;
-
- if(second_step)
- {
- for(j = 0;j < batches;j++)
- {
- l_bound[j]++;
- u_bound[j]--;
-
- for(k = l_bound[j];k < u_bound[j];k++)
- {
- detail::copy_vec_to_vec(boost::numeric::ublas::column(Q, k), t);
- inner_rt = viennacl::linalg::inner_prod(r,t);
- r = r - inner_rt * t;
- w[index][k] = 1.5 * eps * get_N();
- reorths++;
- }
- }
- temp = viennacl::linalg::norm_2(r);
- r = r / temp;
- vcl_beta = vcl_beta * temp;
- second_step = false;
- }
- batches = 0;
-
- for(j = 0;j < i;j++)
- {
- if(fabs(w[index][j]) >= squ_eps)
- {
- detail::copy_vec_to_vec(boost::numeric::ublas::column(Q, j), t);
- inner_rt = viennacl::linalg::inner_prod(r,t);
- r = r - inner_rt * t;
- w[index][j] = 1.5 * eps * get_N();
- k = j - 1;
- reorths++;
- while(k >= 0 && fabs(w[index][k]) > eta)
- {
- detail::copy_vec_to_vec(boost::numeric::ublas::column(Q, k), t);
- inner_rt = viennacl::linalg::inner_prod(r,t);
- r = r - inner_rt * t;
- w[index][k] = 1.5 * eps * get_N();
- k--;
- reorths++;
- }
- l_bound[batches] = k + 1;
- k = j + 1;
-
- while(k < i && fabs(w[index][k]) > eta)
- {
- detail::copy_vec_to_vec(boost::numeric::ublas::column(Q, k), t);
- inner_rt = viennacl::linalg::inner_prod(r,t);
- r = r - inner_rt * t;
- w[index][k] = 1.5 * eps * get_N();
- k++;
- reorths++;
- }
- u_bound[batches] = k - 1;
- batches++;
- j = k;
- }
- }
-
- if(batches > 0)
- {
- temp = viennacl::linalg::norm_2(r);
- r = r / temp;
- vcl_beta = vcl_beta * temp;
- second_step = true;
-
- while(temp < retry_th)
- {
- for(j = 0;j < i;j++)
- {
- detail::copy_vec_to_vec(boost::numeric::ublas::column(Q, k), t);
- inner_rt = viennacl::linalg::inner_prod(r,t);
- r = r - inner_rt * t;
- reorths++;
- }
- retry++;
- temp = viennacl::linalg::norm_2(r);
- r = r / temp;
- vcl_beta = vcl_beta * temp;
- }
- }
-
- detail::copy_vec_to_vec(r,s);
- boost::numeric::ublas::column(Q, i) = s;
-
- cpu_beta = vcl_beta;
- s = - cpu_beta * boost::numeric::ublas::column(Q, i - 1);
- detail::copy_vec_to_vec(s, u);
- u += viennacl::linalg::prod(A, r);
- vcl_alpha = viennacl::linalg::inner_prod(u, r);
- alphas.push_back(vcl_alpha);
- }
-
- return bisect(alphas, betas);
-
- }
-
-
- /**
- * @brief Implementation of the lanczos algorithm without reorthogonalization
- *
- * @param A The system matrix
- * @param r Random start vector
- * @param size Size of krylov-space
- * @param tag Lanczos_tag with several options for the algorithm
- * @return Returns the eigenvalues (number of eigenvalues equals size of krylov-space)
- */
- template< typename MatrixT, typename VectorT >
- std::vector<
- typename viennacl::result_of::cpu_value_type<typename MatrixT::value_type>::type
- >
- lanczos (MatrixT const& A, VectorT & r, int size, lanczos_tag const & tag)
- {
-
- typedef typename viennacl::result_of::value_type<MatrixT>::type ScalarType;
- typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
-
- long i;
- ScalarType vcl_beta;
- ScalarType vcl_alpha;
- std::vector<CPU_ScalarType> alphas, betas;
- CPU_ScalarType norm;
- long n = r.size();
- VectorT u(n), t(n);
- boost::numeric::ublas::vector<CPU_ScalarType> s(r.size()), u_zero(n), q(n);
- boost::numeric::ublas::matrix<CPU_ScalarType> Q(n, size);
-
- u_zero = boost::numeric::ublas::zero_vector<CPU_ScalarType>(n);
- detail::copy_vec_to_vec(u_zero, u);
- norm = norm_2(r);
-
- for(i = 0;i < size; i++)
- {
- r /= norm;
- vcl_beta = norm;
-
- detail::copy_vec_to_vec(r,s);
- boost::numeric::ublas::column(Q, i) = s;
-
- u += prod(A, r);
- vcl_alpha = inner_prod(u, r);
- r = u - vcl_alpha * r;
- norm = norm_2(r);
-
- q = boost::numeric::ublas::column(Q, i);
- detail::copy_vec_to_vec(q, t);
-
- u = - norm * t;
- alphas.push_back(vcl_alpha);
- betas.push_back(vcl_beta);
- s.clear();
- }
-
- return bisect(alphas, betas);
- }
-
- /**
- * @brief Implementation of the Lanczos FRO algorithm
- *
- * @param A The system matrix
- * @param r Random start vector
- * @param size Size of krylov-space
- * @param tag Lanczos_tag with several options for the algorithm
- * @return Returns the eigenvalues (number of eigenvalues equals size of krylov-space)
- */
- template< typename MatrixT, typename VectorT >
- std::vector<
- typename viennacl::result_of::cpu_value_type<typename MatrixT::value_type>::type
- >
- lanczosFRO (MatrixT const& A, VectorT & r, int size, lanczos_tag const & tag)
- {
-
- typedef typename viennacl::result_of::value_type<MatrixT>::type ScalarType;
- typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
-
- CPU_ScalarType temp;
- CPU_ScalarType norm;
- ScalarType vcl_beta;
- ScalarType vcl_alpha;
- std::vector<CPU_ScalarType> alphas, betas;
- long n = r.size();
- VectorT u(n), t(n);
- ScalarType inner_rt;
- boost::numeric::ublas::vector<CPU_ScalarType> u_zero(n), s(r.size()), q(n);
- boost::numeric::ublas::matrix<CPU_ScalarType> Q(n, size);
-
- long reorths = 0;
- norm = norm_2(r);
-
-
- for(long i = 0; i < size; i++)
- {
- r /= norm;
-
- for(long j = 0; j < i; j++)
- {
- q = boost::numeric::ublas::column(Q, j);
- detail::copy_vec_to_vec(q, t);
- inner_rt = viennacl::linalg::inner_prod(r,t);
- r = r - inner_rt * t;
- reorths++;
- }
- temp = viennacl::linalg::norm_2(r);
- r = r / temp;
- vcl_beta = temp * norm;
- detail::copy_vec_to_vec(r,s);
- boost::numeric::ublas::column(Q, i) = s;
-
- u += viennacl::linalg::prod(A, r);
- vcl_alpha = viennacl::linalg::inner_prod(u, r);
- r = u - vcl_alpha * r;
- norm = viennacl::linalg::norm_2(r);
- q = boost::numeric::ublas::column(Q, i);
- detail::copy_vec_to_vec(q, t);
- u = - norm * t;
- alphas.push_back(vcl_alpha);
- betas.push_back(vcl_beta);
- }
-
- return bisect(alphas, betas);
- }
-
- } // end namespace detail
-
- } // end namespace linalg
-} // end namespace viennacl
-#endif
\ No newline at end of file
diff --git a/viennacl/tools/matrix_kernel_class_deducer.hpp b/viennacl/tools/matrix_kernel_class_deducer.hpp
deleted file mode 100644
index b942c51..0000000
--- a/viennacl/tools/matrix_kernel_class_deducer.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef VIENNACL_TOOLS_MATRIX_KERNEL_CLASS_DEDUCER_HPP_
-#define VIENNACL_TOOLS_MATRIX_KERNEL_CLASS_DEDUCER_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file matrix_kernel_class_deducer.hpp
- @brief Implementation of a helper meta class for deducing the correct kernels for the supplied matrix
-*/
-
-#include <string>
-#include <fstream>
-#include <sstream>
-#include "viennacl/forwards.h"
-#include "viennacl/linalg/kernels/matrix_col_kernels.h"
-#include "viennacl/linalg/kernels/matrix_row_kernels.h"
-
-#include <vector>
-#include <map>
-
-namespace viennacl
-{
- namespace tools
- {
- /** @brief Implementation of a helper meta class for deducing the correct kernels for the supplied matrix */
- template <typename MatrixType1>
- struct MATRIX_KERNEL_CLASS_DEDUCER
- {};
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_row<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_col<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- //support for matrix range:
- template <typename T>
- struct MATRIX_KERNEL_CLASS_DEDUCER< viennacl::matrix_range<T> >
- {
- typedef typename MATRIX_KERNEL_CLASS_DEDUCER<T>::ResultType ResultType;
- };
-
- //support for matrix slice:
- template <typename T>
- struct MATRIX_KERNEL_CLASS_DEDUCER< viennacl::matrix_slice<T> >
- {
- typedef typename MATRIX_KERNEL_CLASS_DEDUCER<T>::ResultType ResultType;
- };
-
- }
-
-}
-
-#endif
diff --git a/viennacl/tools/matrix_prod_kernel_class_deducer.hpp b/viennacl/tools/matrix_prod_kernel_class_deducer.hpp
deleted file mode 100644
index 3c3b6f9..0000000
--- a/viennacl/tools/matrix_prod_kernel_class_deducer.hpp
+++ /dev/null
@@ -1,171 +0,0 @@
-#ifndef VIENNACL_TOOLS_MATRIX_PROD_KERNEL_CLASS_DEDUCER_HPP_
-#define VIENNACL_TOOLS_MATRIX_PROD_KERNEL_CLASS_DEDUCER_HPP_
-
-/* =========================================================================
- Copyright (c) 2010-2012, Institute for Microelectronics,
- Institute for Analysis and Scientific Computing,
- TU Wien.
-
- -----------------
- ViennaCL - The Vienna Computing Library
- -----------------
-
- Project Head: Karl Rupp rupp at iue.tuwien.ac.at
-
- (A list of authors and contributors can be found in the PDF manual)
-
- License: MIT (X11), see file LICENSE in the base directory
-============================================================================= */
-
-/** @file matrix_prod_kernel_class_deducer.hpp
- @brief Implementation of a helper meta class for deducing the correct kernels for matrix-matrix products
-*/
-
-#include <string>
-#include <fstream>
-#include <sstream>
-#include "viennacl/forwards.h"
-#include "viennacl/linalg/kernels/matrix_prod_col_col_col_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_col_col_row_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_col_row_col_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_col_row_row_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_row_col_col_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_row_col_row_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_row_row_col_kernels.h"
-#include "viennacl/linalg/kernels/matrix_prod_row_row_row_kernels.h"
-
-#include <vector>
-#include <map>
-
-namespace viennacl
-{
- namespace tools
- {
- namespace detail
- {
- template <typename MatrixType>
- struct extract_matrix
- {
- typedef typename MatrixType::ERROR_UNKNOWN_MATRIX_TYPE_PROVIDED error_type;
- };
-
- template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
- struct extract_matrix < viennacl::matrix<SCALARTYPE, F, ALIGNMENT> >
- {
- typedef viennacl::matrix<SCALARTYPE, F, ALIGNMENT> type;
- };
-
- template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
- struct extract_matrix < const viennacl::matrix<SCALARTYPE, F, ALIGNMENT> >
- {
- typedef viennacl::matrix<SCALARTYPE, F, ALIGNMENT> type;
- };
-
-
- template <typename MatrixType>
- struct extract_matrix < viennacl::matrix_range<MatrixType> >
- {
- typedef typename extract_matrix<MatrixType>::type type;
- };
-
- template <typename MatrixType>
- struct extract_matrix < const viennacl::matrix_range<MatrixType> >
- {
- typedef typename extract_matrix<MatrixType>::type type;
- };
-
- template <typename MatrixType>
- struct extract_matrix < viennacl::matrix_slice<MatrixType> >
- {
- typedef typename extract_matrix<MatrixType>::type type;
- };
-
- template <typename MatrixType>
- struct extract_matrix < const viennacl::matrix_slice<MatrixType> >
- {
- typedef typename extract_matrix<MatrixType>::type type;
- };
-
- }
-
-
-
- /** @brief deduces kernel type for C=A*B, where A, B, C are MatrixType1, MatrixType2 and MatrixType3 respectively */
- template <typename MatrixType1, typename MatrixType2, typename MatrixType3>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER
- {
- typedef typename MATRIX_PROD_KERNEL_CLASS_DEDUCER< typename detail::extract_matrix<MatrixType1>::type,
- typename detail::extract_matrix<MatrixType2>::type,
- typename detail::extract_matrix<MatrixType3>::type>::ResultType ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_row_row_row<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_row_row_col<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_row_col_row<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_row_col_col<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_col_row_row<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_col_row_col<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::row_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_col_col_row<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- template <typename SCALARTYPE, unsigned int ALIGNMENT>
- struct MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT>,
- viennacl::matrix<SCALARTYPE, viennacl::column_major, ALIGNMENT> >
- {
- typedef viennacl::linalg::kernels::matrix_prod_col_col_col<SCALARTYPE, ALIGNMENT> ResultType;
- };
-
- }
-
-}
-
-#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/viennacl.git
More information about the debian-science-commits
mailing list