[arrayfire] 17/284: Merge branch 'devel' into async
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:14 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 5d428dfda7ae8098689885eedb662c0d41ace192
Merge: 49f0cce 46a45b5
Author: pradeep <pradeep at arrayfire.com>
Date: Mon Nov 16 18:15:31 2015 -0500
Merge branch 'devel' into async
ArrayFireConfig.cmake.in | 30 +-
CMakeLists.txt | 38 +-
CMakeModules/FindCBLAS.cmake | 25 +
CMakeModules/FindLAPACKE.cmake | 144 +-
CMakeModules/FindOpenCL.cmake | 34 +-
CMakeModules/Version.cmake | 21 +-
CMakeModules/build_clBLAS.cmake | 2 +-
CMakeModules/build_forge.cmake | 2 +-
CMakeModules/osx_install/OSXInstaller.cmake | 16 +-
CMakeModules/osx_install/cpu_scripts/postinstall | 6 +-
CMakeModules/osx_install/cuda_scripts/postinstall | 4 +-
CMakeModules/osx_install/distribution.dist | 35 +-
.../osx_install/opencl_scripts/postinstall | 4 +-
CMakeModules/version.h.in | 3 +-
COPYRIGHT.md | 12 +-
LICENSES/OpenSIFT License.txt | 57 +
README.md | 14 +-
assets | 2 +-
docs/CMakeLists.txt | 2 +-
docs/details/algorithm.dox | 38 +-
docs/details/array.dox | 13 +-
docs/details/backend.dox | 66 +
docs/details/data.dox | 44 +-
docs/details/image.dox | 136 +-
docs/details/index.dox | 25 +
docs/details/lapack.dox | 22 +
docs/details/util.dox | 148 +
docs/details/vision.dox | 76 +
docs/doxygen.mk | 3 +-
docs/layout.xml | 2 +
docs/pages/INSTALL.md | 166 +-
docs/pages/README.md | 40 +-
docs/pages/getting_started.md | 2 +
docs/pages/gfor.md | 32 -
docs/pages/matrix_manipulation.md | 298 +-
docs/pages/release_notes.md | 323 +-
docs/pages/unified_backend.md | 212 +
docs/pages/using_on_linux.md | 200 +-
docs/pages/using_on_osx.md | 233 +-
docs/pages/using_on_windows.md | 304 +-
examples/CMakeLists.txt | 27 +-
examples/CMakeModules/FindOpenCL.cmake | 34 +-
examples/common/progress.h | 2 +-
examples/financial/heston_model.cpp | 20 +-
examples/graphics/plot3.cpp | 58 +
examples/graphics/surface.cpp | 55 +
examples/image_processing/image_editing.cpp | 2 +-
examples/lin_algebra/svd.cpp | 55 +
examples/pde/swe.cpp | 86 +
examples/unified/basic.cpp | 78 +
include/af/algorithm.h | 17 +-
include/af/arith.h | 31 +-
include/af/array.h | 117 +-
include/af/backend.h | 105 +
include/af/complex.h | 51 +-
include/af/cuda.h | 70 +
include/af/data.h | 145 +-
include/af/defines.h | 110 +-
include/af/device.h | 4 +
include/af/dim4.hpp | 6 +-
include/af/graphics.h | 125 +-
include/af/image.h | 428 +-
include/af/index.h | 82 +-
include/af/lapack.h | 55 +
include/af/macros.h | 24 +
include/af/opencl.h | 426 +-
include/af/signal.h | 42 +
include/af/statistics.h | 28 +-
include/af/traits.hpp | 24 +
include/af/util.h | 197 +-
include/af/vision.h | 296 +-
include/arrayfire.h | 79 +-
src/api/c/approx.cpp | 30 +-
src/api/c/assign.cpp | 88 +-
src/api/c/bilateral.cpp | 2 +
src/api/c/binary.cpp | 8 +
src/api/c/cast.cpp | 2 +
src/api/c/colorspace.cpp | 60 +-
src/api/c/convolve.cpp | 52 +-
src/api/c/corrcoef.cpp | 2 +
src/api/c/covariance.cpp | 12 +-
src/api/c/data.cpp | 53 +-
src/api/c/device.cpp | 60 +-
src/api/c/diff.cpp | 4 +
src/api/c/dog.cpp | 13 +-
src/api/c/err_common.cpp | 6 +-
src/api/c/fast.cpp | 2 +
src/api/c/fftconvolve.cpp | 4 +
src/api/c/filters.cpp | 2 +
src/api/c/flip.cpp | 2 +
src/api/c/graphics_common.cpp | 60 +-
src/api/c/graphics_common.hpp | 18 +-
src/api/c/handle.hpp | 3 +
src/api/c/hist.cpp | 5 +-
src/api/c/histeq.cpp | 4 +
src/api/c/histogram.cpp | 24 +-
src/api/c/homography.cpp | 88 +
src/api/c/image.cpp | 21 +-
src/api/c/imageio.cpp | 513 +-
src/api/c/imageio2.cpp | 389 +
src/api/c/imageio_helper.h | 102 +
src/api/c/implicit.cpp | 6 +
src/api/c/index.cpp | 41 +-
src/api/c/join.cpp | 4 +
src/api/c/match_template.cpp | 2 +
src/api/c/mean.cpp | 131 +-
src/api/c/meanshift.cpp | 4 +
src/api/c/median.cpp | 11 +-
src/api/c/moddims.cpp | 2 +
src/api/c/morph.cpp | 4 +
src/api/c/nearest_neighbour.cpp | 13 +-
src/api/c/plot.cpp | 5 +-
src/api/c/plot3.cpp | 113 +
src/api/c/print.cpp | 113 +-
src/api/c/reduce.cpp | 16 +
src/api/c/regions.cpp | 2 +
src/api/c/reorder.cpp | 2 +
src/api/c/replace.cpp | 113 +
src/api/c/resize.cpp | 2 +
src/api/c/rgb_gray.cpp | 2 +
src/api/c/rotate.cpp | 2 +
src/api/c/sat.cpp | 2 +
src/api/c/scan.cpp | 2 +
src/api/c/select.cpp | 162 +
src/api/c/set.cpp | 12 +
src/api/c/shift.cpp | 2 +
src/api/c/sift.cpp | 132 +
src/api/c/sobel.cpp | 2 +
src/api/c/sort.cpp | 16 +
src/api/c/stats.h | 45 +-
src/api/c/stdev.cpp | 24 +-
src/api/c/stream.cpp | 364 +
src/api/c/surface.cpp | 135 +
src/api/c/susan.cpp | 9 +-
src/api/c/svd.cpp | 128 +
src/api/c/tile.cpp | 2 +
src/api/c/transform.cpp | 2 +
src/api/c/transpose.cpp | 20 +-
src/api/c/type_util.cpp | 10 +-
src/api/c/unary.cpp | 89 +-
src/api/c/unwrap.cpp | 33 +-
src/api/c/util.cpp | 81 +
src/api/c/var.cpp | 18 +-
src/api/c/where.cpp | 2 +
src/api/c/wrap.cpp | 80 +
src/api/c/ycbcr_rgb.cpp | 160 +
src/api/cpp/array.cpp | 67 +-
src/api/cpp/corrcoef.cpp | 4 +
src/api/cpp/data.cpp | 34 +
src/api/cpp/device.cpp | 32 +
src/api/cpp/dog.cpp | 2 +-
src/api/cpp/features.cpp | 1 -
src/api/cpp/graphics.cpp | 21 +
src/api/cpp/homography.cpp | 32 +
src/api/cpp/imageio.cpp | 31 +
src/api/cpp/index.cpp | 2 +-
src/api/cpp/lapack.cpp | 18 +
src/api/cpp/mean.cpp | 4 +
src/api/cpp/median.cpp | 6 +-
src/api/cpp/reduce.cpp | 6 +
src/api/cpp/seq.cpp | 10 +-
src/api/cpp/sift.cpp | 51 +
src/api/cpp/stdev.cpp | 4 +
src/api/cpp/timing.cpp | 8 +-
src/api/cpp/unwrap.cpp | 5 +-
src/api/cpp/util.cpp | 41 +-
src/api/cpp/var.cpp | 2 +
src/api/cpp/{unwrap.cpp => wrap.cpp} | 13 +-
src/api/cpp/{dog.cpp => ycbcr_rgb.cpp} | 15 +-
src/api/unified/CMakeLists.txt | 72 +
src/api/unified/algorithm.cpp | 148 +
src/api/unified/arith.cpp | 102 +
src/api/unified/array.cpp | 108 +
src/api/unified/blas.cpp | 40 +
src/api/unified/data.cpp | 180 +
src/api/unified/device.cpp | 140 +
src/api/unified/features.cpp | 44 +
src/api/unified/graphics.cpp | 83 +
src/api/unified/image.cpp | 252 +
src/api/unified/index.cpp | 54 +
src/api/unified/lapack.cpp | 98 +
src/api/unified/signal.cpp | 143 +
src/api/unified/statistics.cpp | 96 +
src/api/unified/symbol_manager.cpp | 223 +
src/api/unified/symbol_manager.hpp | 108 +
src/api/unified/util.cpp | 63 +
src/api/unified/vision.cpp | 86 +
src/backend/ArrayInfo.cpp | 82 +
src/backend/ArrayInfo.hpp | 37 +-
src/backend/cblas.cpp | 105 +-
src/backend/cpu/Array.cpp | 71 +-
src/backend/cpu/CMakeLists.txt | 5 +-
src/backend/cpu/TNJ/BinaryNode.hpp | 19 +-
src/backend/cpu/TNJ/BufferNode.hpp | 45 +-
src/backend/cpu/TNJ/Node.hpp | 10 +-
src/backend/cpu/TNJ/ScalarNode.hpp | 9 +-
src/backend/cpu/TNJ/UnaryNode.hpp | 15 +-
src/backend/cpu/approx.cpp | 206 +-
src/backend/cpu/assign.cpp | 2 +
src/backend/cpu/bilateral.cpp | 2 +
src/backend/cpu/blas.cpp | 37 +-
src/backend/cpu/convolve.cpp | 4 +
src/backend/cpu/copy.cpp | 54 +-
src/backend/cpu/diagonal.cpp | 2 +
src/backend/cpu/diff.cpp | 2 +
src/backend/cpu/fast.cpp | 2 +
src/backend/cpu/fftconvolve.cpp | 4 +
src/backend/cpu/hist_graphics.cpp | 2 +
src/backend/cpu/histogram.cpp | 12 +-
src/backend/cpu/histogram.hpp | 2 +-
src/backend/cpu/homography.cpp | 383 +
src/backend/cpu/{histogram.hpp => homography.hpp} | 10 +-
src/backend/cpu/identity.cpp | 4 +-
src/backend/cpu/image.cpp | 2 +
src/backend/cpu/index.cpp | 2 +
src/backend/cpu/iota.cpp | 2 +
src/backend/cpu/ireduce.cpp | 4 +
src/backend/cpu/join.cpp | 4 +
src/backend/cpu/lookup.cpp | 6 +
src/backend/cpu/match_template.cpp | 2 +
src/backend/cpu/math.cpp | 3 -
src/backend/cpu/math.hpp | 3 -
src/backend/cpu/meanshift.cpp | 4 +
src/backend/cpu/medfilt.cpp | 2 +
src/backend/cpu/memory.cpp | 2 +
src/backend/cpu/morph.cpp | 2 +
src/backend/cpu/nearest_neighbour.cpp | 13 +-
src/backend/cpu/platform.cpp | 192 +-
src/backend/cpu/platform.hpp | 2 +
src/backend/cpu/plot.cpp | 2 +
src/backend/cpu/{plot.cpp => plot3.cpp} | 12 +-
src/backend/{opencl/plot.hpp => cpu/plot3.hpp} | 5 +-
src/backend/cpu/random.cpp | 26 +-
src/backend/cpu/range.cpp | 2 +
src/backend/cpu/reduce.cpp | 38 +-
src/backend/cpu/regions.cpp | 2 +
src/backend/cpu/reorder.cpp | 2 +
src/backend/cpu/resize.cpp | 2 +
src/backend/cpu/rotate.cpp | 2 +
src/backend/cpu/scan.cpp | 2 +
src/backend/cpu/select.cpp | 145 +
src/backend/cpu/{unwrap.hpp => select.hpp} | 11 +-
src/backend/cpu/set.cpp | 4 +
src/backend/cpu/shift.cpp | 2 +
src/backend/cpu/sift.cpp | 67 +
src/backend/cpu/sift.hpp | 27 +
src/backend/cpu/sift_nonfree.hpp | 1193 ++
src/backend/cpu/sobel.cpp | 2 +
src/backend/cpu/sort.cpp | 4 +
src/backend/cpu/sort_by_key.cpp | 9 +
src/backend/cpu/sort_index.cpp | 4 +
src/backend/cpu/{plot.cpp => surface.cpp} | 12 +-
src/backend/{opencl/plot.hpp => cpu/surface.hpp} | 5 +-
src/backend/cpu/susan.cpp | 21 +-
src/backend/cpu/svd.cpp | 121 +
src/backend/cpu/{histogram.hpp => svd.hpp} | 8 +-
src/backend/cpu/tile.cpp | 2 +
src/backend/cpu/transform.cpp | 2 +
src/backend/cpu/transpose.cpp | 2 +
src/backend/cpu/triangle.cpp | 2 +
src/backend/cpu/types.hpp | 1 +
src/backend/cpu/unwrap.cpp | 46 +-
src/backend/cpu/unwrap.hpp | 3 +-
src/backend/cpu/where.cpp | 2 +
src/backend/cpu/wrap.cpp | 124 +
src/backend/cpu/{unwrap.hpp => wrap.hpp} | 11 +-
src/backend/cuda/Array.cpp | 14 +-
src/backend/cuda/CMakeLists.txt | 47 +-
src/backend/cuda/JIT/arith.cu | 2 +
src/backend/cuda/JIT/cast.cu | 20 +-
src/backend/cuda/JIT/exp.cu | 4 +
src/backend/cuda/JIT/hyper.cu | 2 +
src/backend/cuda/JIT/logic.cu | 8 +
src/backend/cuda/JIT/numeric.cu | 42 +-
src/backend/cuda/JIT/trig.cu | 4 +
src/backend/cuda/JIT/types.h | 1 +
src/backend/cuda/all.cu | 2 +
src/backend/cuda/any.cu | 2 +
src/backend/cuda/assign.cu | 8 +-
src/backend/cuda/bilateral.cu | 2 +
src/backend/cuda/blas.cpp | 4 +-
src/backend/cuda/cholesky.cu | 28 +
src/backend/cuda/convolve.cpp | 4 +
src/backend/cuda/copy.cu | 38 +-
src/backend/cuda/count.cu | 2 +
src/backend/cuda/cpu_lapack/cpu_cholesky.cpp | 109 +
.../{unwrap.hpp => cpu_lapack/cpu_cholesky.hpp} | 10 +-
src/backend/cuda/cpu_lapack/cpu_inverse.cpp | 92 +
.../plot.hpp => cuda/cpu_lapack/cpu_inverse.hpp} | 14 +-
src/backend/cuda/cpu_lapack/cpu_lu.cpp | 197 +
.../cuda/{unwrap.hpp => cpu_lapack/cpu_lu.hpp} | 10 +-
src/backend/cuda/cpu_lapack/cpu_qr.cpp | 160 +
.../cuda/{unwrap.hpp => cpu_lapack/cpu_qr.hpp} | 10 +-
src/backend/cuda/cpu_lapack/cpu_solve.cpp | 206 +
.../cuda/{unwrap.hpp => cpu_lapack/cpu_solve.hpp} | 11 +-
src/backend/cuda/cpu_lapack/cpu_svd.cpp | 153 +
.../cuda/{histogram.hpp => cpu_lapack/cpu_svd.hpp} | 10 +-
src/backend/cuda/cpu_lapack/cpu_triangle.hpp | 52 +
src/backend/cuda/cpu_lapack/lapack_helper.hpp | 35 +
src/backend/cuda/diagonal.cu | 2 +
src/backend/cuda/diff.cu | 2 +
src/backend/cuda/dilate.cu | 2 +
src/backend/cuda/dilate3d.cu | 2 +
src/backend/cuda/driver.cpp | 2 +-
src/backend/cuda/erode.cu | 2 +
src/backend/cuda/erode3d.cu | 2 +
src/backend/cuda/fast.cu | 2 +
src/backend/cuda/fast_pyramid.cu | 2 +
src/backend/cuda/fftconvolve.cu | 4 +
src/backend/cuda/hist_graphics.cu | 2 +
src/backend/cuda/histogram.cu | 23 +-
src/backend/cuda/histogram.hpp | 2 +-
src/backend/cuda/homography.cu | 79 +
src/backend/cuda/{histogram.hpp => homography.hpp} | 10 +-
src/backend/cuda/identity.cu | 2 +
src/backend/cuda/image.cu | 2 +
src/backend/cuda/index.cu | 6 +-
src/backend/cuda/interopManager.cu | 34 +
src/backend/cuda/interopManager.hpp | 2 +
src/backend/cuda/inverse.cu | 22 +
src/backend/cuda/iota.cu | 2 +
src/backend/cuda/ireduce.cu | 4 +
src/backend/cuda/jit.cpp | 2 +
src/backend/cuda/join.cu | 22 +-
src/backend/cuda/kernel/approx.hpp | 121 +-
src/backend/cuda/kernel/atomics.hpp | 59 +
src/backend/cuda/kernel/convolve.cu | 16 +-
src/backend/cuda/kernel/convolve_separable.cu | 12 +-
src/backend/cuda/kernel/fast.hpp | 24 +-
src/backend/cuda/kernel/fftconvolve.hpp | 18 +-
src/backend/cuda/kernel/harris.hpp | 7 +-
src/backend/cuda/kernel/histogram.hpp | 33 +-
src/backend/cuda/kernel/homography.hpp | 699 ++
src/backend/cuda/kernel/ireduce.hpp | 24 +-
src/backend/cuda/kernel/memcopy.hpp | 3 +
src/backend/cuda/kernel/nearest_neighbour.hpp | 9 +
src/backend/cuda/kernel/orb.hpp | 47 +-
src/backend/cuda/kernel/reduce.hpp | 21 +-
src/backend/cuda/kernel/regions.hpp | 11 +-
src/backend/cuda/kernel/select.hpp | 155 +
src/backend/cuda/kernel/shared.hpp | 4 +
src/backend/cuda/kernel/sift_nonfree.hpp | 1627 +++
src/backend/cuda/kernel/susan.hpp | 4 +-
src/backend/cuda/kernel/triangle.hpp | 8 +-
src/backend/cuda/kernel/unwrap.hpp | 103 +-
src/backend/cuda/kernel/where.hpp | 6 +-
src/backend/cuda/kernel/wrap.hpp | 113 +
src/backend/cuda/lookup.cu | 6 +
src/backend/cuda/lu.cu | 30 +
src/backend/cuda/match_template.cu | 2 +
src/backend/cuda/math.hpp | 9 +
src/backend/cuda/max.cu | 2 +
src/backend/cuda/meanshift.cu | 4 +
src/backend/cuda/medfilt.cu | 2 +
src/backend/cuda/memory.cpp | 2 +
src/backend/cuda/min.cu | 2 +
src/backend/cuda/nearest_neighbour.cu | 2 +
src/backend/cuda/platform.cpp | 40 +-
src/backend/cuda/platform.hpp | 4 +
src/backend/cuda/plot.cu | 2 +
src/backend/cuda/{plot.cu => plot3.cu} | 12 +-
src/backend/{opencl/plot.hpp => cuda/plot3.hpp} | 5 +-
src/backend/cuda/product.cu | 4 +-
src/backend/cuda/qr.cu | 29 +
src/backend/cuda/random.cu | 2 +
src/backend/cuda/range.cu | 2 +
src/backend/cuda/regions.cu | 2 +
src/backend/cuda/reorder.cu | 2 +
src/backend/cuda/resize.cu | 2 +
src/backend/cuda/rotate.cu | 2 +
src/backend/cuda/scan.cu | 2 +
src/backend/cuda/select.cu | 53 +
src/backend/cuda/{unwrap.hpp => select.hpp} | 11 +-
src/backend/cuda/set.cu | 4 +
src/backend/cuda/shift.cu | 2 +
src/backend/cuda/sift.cu | 95 +
src/backend/cuda/sift.hpp | 27 +
src/backend/cuda/sobel.cu | 2 +
src/backend/cuda/solve.cu | 31 +
src/backend/cuda/sort.cu | 4 +
.../cuda/{dilate.cu => sort_by_key/ascd_s16.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/ascd_s64.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/ascd_u16.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/ascd_u64.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/desc_s16.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/desc_s64.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/desc_u16.cu} | 11 +-
.../cuda/{dilate.cu => sort_by_key/desc_u64.cu} | 11 +-
src/backend/cuda/sort_by_key_impl.hpp | 6 +-
src/backend/cuda/sort_index.cu | 4 +
src/backend/cuda/sum.cu | 10 +
src/backend/cuda/{plot.cu => surface.cu} | 12 +-
src/backend/{opencl/plot.hpp => cuda/surface.hpp} | 5 +-
src/backend/cuda/susan.cu | 22 +-
src/backend/cuda/svd.cu | 183 +
src/backend/cuda/{histogram.hpp => svd.hpp} | 8 +-
src/backend/cuda/tile.cu | 2 +
src/backend/cuda/transform.cu | 2 +
src/backend/cuda/transpose.cu | 2 +
src/backend/cuda/transpose_inplace.cu | 2 +
src/backend/cuda/triangle.cu | 2 +
src/backend/cuda/types.cpp | 8 +
src/backend/cuda/types.hpp | 3 +-
src/backend/cuda/unwrap.cu | 28 +-
src/backend/cuda/unwrap.hpp | 3 +-
src/backend/cuda/where.cu | 2 +
src/backend/cuda/wrap.cu | 59 +
src/backend/cuda/{unwrap.hpp => wrap.hpp} | 11 +-
src/backend/defines.hpp | 4 +
src/backend/dim4.cpp | 64 +-
src/backend/lapacke.cpp | 168 +
src/backend/lapacke.hpp | 93 +-
src/backend/opencl/Array.cpp | 4 +-
src/backend/opencl/CMakeLists.txt | 56 +-
src/backend/opencl/all.cpp | 2 +
src/backend/opencl/any.cpp | 2 +
src/backend/opencl/assign.cpp | 6 +-
src/backend/opencl/bilateral.cpp | 2 +
src/{api/cpp/dog.cpp => backend/opencl/cache.hpp} | 23 +-
src/backend/opencl/cl.hpp | 10803 ++++++++++---------
src/backend/opencl/convolve.cpp | 4 +
src/backend/opencl/convolve_separable.cpp | 48 +-
src/backend/opencl/copy.cpp | 24 +-
src/backend/opencl/count.cpp | 2 +
src/backend/opencl/diagonal.cpp | 2 +
src/backend/opencl/diff.cpp | 2 +
src/backend/opencl/dilate.cpp | 2 +
src/backend/opencl/dilate3d.cpp | 2 +
src/backend/opencl/erode.cpp | 2 +
src/backend/opencl/erode3d.cpp | 2 +
src/backend/opencl/fast.cpp | 2 +
src/backend/opencl/fftconvolve.cpp | 4 +
src/backend/opencl/hist_graphics.cpp | 2 +
src/backend/opencl/histogram.cpp | 23 +-
src/backend/opencl/histogram.hpp | 2 +-
src/backend/opencl/homography.cpp | 96 +
src/backend/opencl/{plot.hpp => homography.hpp} | 17 +-
src/backend/opencl/identity.cpp | 2 +
src/backend/opencl/image.cpp | 6 +-
src/backend/opencl/index.cpp | 6 +-
src/backend/opencl/interopManager.cpp | 24 +
src/backend/opencl/interopManager.hpp | 2 +
src/backend/opencl/iota.cpp | 2 +
src/backend/opencl/ireduce.cpp | 4 +
src/backend/opencl/jit.cpp | 8 +-
src/backend/opencl/join.cpp | 4 +
src/backend/opencl/kernel/approx.hpp | 30 +-
src/backend/opencl/kernel/approx1.cl | 51 +-
src/backend/opencl/kernel/approx2.cl | 69 +-
src/backend/opencl/kernel/assign.cl | 5 +-
src/backend/opencl/kernel/convolve/conv1.cpp | 4 +
src/backend/opencl/kernel/convolve/conv2_impl.hpp | 117 +-
.../{plot.hpp => kernel/convolve/conv2_s16.cpp} | 16 +-
.../{plot.hpp => kernel/convolve/conv2_s64.cpp} | 16 +-
.../{plot.hpp => kernel/convolve/conv2_u16.cpp} | 16 +-
.../{plot.hpp => kernel/convolve/conv2_u64.cpp} | 16 +-
src/backend/opencl/kernel/convolve/conv3.cpp | 4 +
src/backend/opencl/kernel/convolve_separable.cpp | 135 +
src/backend/opencl/kernel/convolve_separable.hpp | 79 +-
src/backend/opencl/kernel/fast.cl | 2 +-
src/backend/opencl/kernel/fast.hpp | 100 +-
src/backend/opencl/kernel/gradient.cl | 3 +-
src/backend/opencl/kernel/gradient.hpp | 7 +-
src/backend/opencl/kernel/harris.hpp | 47 +-
src/backend/opencl/kernel/histogram.cl | 24 +-
src/backend/opencl/kernel/histogram.hpp | 15 +-
src/backend/opencl/kernel/homography.cl | 516 +
src/backend/opencl/kernel/homography.hpp | 261 +
src/backend/opencl/kernel/hsv_rgb.cl | 2 +-
src/backend/opencl/kernel/iir.cl | 4 +-
src/backend/opencl/kernel/index.cl | 5 +-
src/backend/opencl/kernel/iops.cl | 24 +-
src/backend/opencl/kernel/ireduce.hpp | 196 +-
src/backend/opencl/kernel/lookup.cl | 5 +-
src/backend/opencl/kernel/nearest_neighbour.cl | 12 +-
src/backend/opencl/kernel/nearest_neighbour.hpp | 58 +-
src/backend/opencl/kernel/orb.hpp | 13 +-
src/backend/opencl/kernel/reduce.hpp | 252 +-
src/backend/opencl/kernel/regions.cl | 6 +-
src/backend/opencl/kernel/regions.hpp | 5 +
src/backend/opencl/kernel/rotate.hpp | 5 +
src/backend/opencl/kernel/scan_dim.cl | 2 +
src/backend/opencl/kernel/scan_dim.hpp | 207 +-
src/backend/opencl/kernel/scan_first.hpp | 196 +-
src/backend/opencl/kernel/select.cl | 97 +
src/backend/opencl/kernel/select.hpp | 175 +
src/backend/opencl/kernel/set.cl | 20 -
src/backend/opencl/kernel/set.hpp | 67 -
src/backend/opencl/kernel/sift_nonfree.cl | 1020 ++
src/backend/opencl/kernel/sift_nonfree.hpp | 827 ++
src/backend/opencl/kernel/sort.hpp | 30 +-
src/backend/opencl/kernel/sort_by_key.hpp | 26 +-
src/backend/opencl/kernel/sort_index.hpp | 28 +-
src/backend/opencl/kernel/susan.cl | 5 +-
src/backend/opencl/kernel/susan.hpp | 4 +-
src/backend/opencl/kernel/transform.hpp | 7 +-
src/backend/opencl/kernel/transform_interp.cl | 4 +-
src/backend/opencl/kernel/unwrap.cl | 55 +-
src/backend/opencl/kernel/unwrap.hpp | 91 +-
src/backend/opencl/kernel/where.cl | 2 +-
src/backend/opencl/kernel/where.hpp | 7 +-
src/backend/opencl/kernel/wrap.cl | 74 +
src/backend/opencl/kernel/wrap.hpp | 112 +
src/backend/opencl/lookup.cpp | 6 +
src/backend/opencl/magma/gebrd.cpp | 368 +
src/backend/opencl/magma/geqrf2.cpp | 17 +-
src/backend/opencl/magma/geqrf3.cpp | 19 +-
src/backend/opencl/magma/getrf.cpp | 130 +-
src/backend/opencl/magma/getrs.cpp | 30 +-
src/backend/opencl/magma/labrd.cpp | 668 ++
src/backend/opencl/magma/larfb.cpp | 125 +-
src/backend/opencl/magma/magma.h | 21 +
src/backend/opencl/magma/magma_blas.h | 57 +-
src/backend/opencl/magma/magma_cpu_blas.h | 91 +
src/backend/opencl/magma/magma_cpu_lapack.h | 143 +-
src/backend/opencl/magma/magma_helper.cpp | 20 +
src/backend/opencl/magma/magma_helper.h | 3 +
src/backend/opencl/magma/potrf.cpp | 130 +-
src/backend/opencl/magma/ungqr.cpp | 11 +-
src/backend/opencl/magma/unmqr.cpp | 11 +-
src/backend/opencl/magma/unmqr2.cpp | 10 +-
src/backend/opencl/match_template.cpp | 2 +
src/backend/opencl/max.cpp | 2 +
src/backend/opencl/meanshift.cpp | 4 +
src/backend/opencl/medfilt.cpp | 2 +
src/backend/opencl/memory.cpp | 2 +
src/backend/opencl/min.cpp | 2 +
src/backend/opencl/nearest_neighbour.cpp | 63 +-
src/backend/opencl/platform.cpp | 66 +-
src/backend/opencl/platform.hpp | 5 +
src/backend/opencl/plot.cpp | 2 +
src/backend/opencl/plot.hpp | 1 -
src/backend/opencl/{plot.cpp => plot3.cpp} | 19 +-
src/backend/opencl/{plot.hpp => plot3.hpp} | 3 +-
src/backend/opencl/product.cpp | 2 +
src/backend/opencl/random.cpp | 2 +
src/backend/opencl/range.cpp | 2 +
src/backend/opencl/regions.cpp | 2 +
src/backend/opencl/reorder.cpp | 2 +
src/backend/opencl/resize.cpp | 2 +
src/backend/opencl/rotate.cpp | 2 +
src/backend/opencl/scan.cpp | 14 +-
src/backend/opencl/select.cpp | 54 +
src/backend/opencl/{plot.hpp => select.hpp} | 16 +-
src/backend/opencl/set.cpp | 48 +-
src/backend/opencl/shift.cpp | 2 +
src/backend/opencl/sift.cpp | 87 +
src/backend/opencl/sift.hpp | 27 +
src/backend/opencl/sobel.cpp | 2 +
src/backend/opencl/solve.cpp | 80 +-
src/backend/opencl/sort.cpp | 4 +
.../opencl/{plot.hpp => sort_by_key/b8.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/f32.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/f64.cpp} | 13 +-
.../{sort_by_key.cpp => sort_by_key/impl.hpp} | 38 +-
.../opencl/{plot.hpp => sort_by_key/s16.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/s32.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/s64.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/u16.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/u32.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/u64.cpp} | 13 +-
.../opencl/{plot.hpp => sort_by_key/u8.cpp} | 13 +-
src/backend/opencl/sort_index.cpp | 4 +
src/backend/opencl/sum.cpp | 10 +
src/backend/opencl/{plot.cpp => surface.cpp} | 16 +-
src/backend/opencl/{plot.hpp => surface.hpp} | 2 +-
src/backend/opencl/susan.cpp | 40 +-
src/backend/opencl/svd.cpp | 260 +
src/backend/opencl/{histogram.hpp => svd.hpp} | 7 +-
src/backend/opencl/tile.cpp | 2 +
src/backend/opencl/traits.hpp | 10 -
src/backend/opencl/transform.cpp | 2 +
src/backend/opencl/transpose.cpp | 2 +
src/backend/opencl/transpose_inplace.cpp | 2 +
src/backend/opencl/triangle.cpp | 2 +
src/backend/opencl/types.cpp | 2 +
src/backend/opencl/types.hpp | 1 +
src/backend/opencl/unwrap.cpp | 26 +-
src/backend/opencl/unwrap.hpp | 3 +-
src/backend/opencl/where.cpp | 2 +
src/backend/opencl/wrap.cpp | 59 +
src/backend/opencl/{plot.hpp => wrap.hpp} | 17 +-
test/CMakeLists.txt | 57 +-
test/approx1.cpp | 46 +
test/approx2.cpp | 52 +
test/array.cpp | 38 +-
test/assign.cpp | 157 +-
test/bilateral.cpp | 2 +-
test/blas.cpp | 37 +-
test/constant.cpp | 2 +-
test/convolve.cpp | 2 +-
test/corrcoef.cpp | 94 +
test/covariance.cpp | 135 +
test/data | 2 +-
test/diff1.cpp | 2 +-
test/diff2.cpp | 2 +-
test/dog.cpp | 7 +-
test/fast.cpp | 2 +-
test/fftconvolve.cpp | 2 +-
test/getting_started.cpp | 8 +-
test/gloh_nonfree.cpp | 336 +
test/hamming.cpp | 4 +-
test/histogram.cpp | 21 +-
test/homography.cpp | 277 +
test/imageio.cpp | 74 +-
test/index.cpp | 129 +-
test/info.cpp | 36 +-
test/iota.cpp | 2 +-
test/ireduce.cpp | 80 +-
test/join.cpp | 2 +-
test/match_template.cpp | 2 +-
test/mean.cpp | 206 +-
test/meanshift.cpp | 2 +-
test/medfilt.cpp | 2 +-
test/median.cpp | 27 +-
test/missing.cpp | 2 -
test/moddims.cpp | 2 +-
test/morph.cpp | 2 +-
test/nearest_neighbour.cpp | 14 +-
test/random.cpp | 2 +-
test/range.cpp | 2 +-
test/rank_dense.cpp | 30 +
test/reduce.cpp | 78 +-
test/regions.cpp | 2 +-
test/reorder.cpp | 2 +-
test/replace.cpp | 120 +
test/resize.cpp | 2 +-
test/rotate.cpp | 2 +-
test/rotate_linear.cpp | 2 +-
test/sat.cpp | 2 +-
test/scan.cpp | 4 +-
test/select.cpp | 128 +
test/set.cpp | 8 +
test/shift.cpp | 2 +-
test/sift_nonfree.cpp | 342 +
test/sobel.cpp | 2 +-
test/solve_dense.cpp | 6 +
test/sort.cpp | 2 +-
test/sort_by_key.cpp | 7 +-
test/sort_index.cpp | 7 +-
test/stdev.cpp | 207 +
test/susan.cpp | 2 +-
test/svd_dense.cpp | 99 +
test/testHelpers.hpp | 41 +-
test/tile.cpp | 2 +-
test/translate.cpp | 2 +-
test/transpose.cpp | 20 +-
test/transpose_inplace.cpp | 2 +-
test/triangle.cpp | 2 +-
test/unwrap.cpp | 40 +-
test/var.cpp | 16 +-
test/where.cpp | 2 +-
test/wrap.cpp | 179 +
test/write.cpp | 2 +-
test/ycbcr_rgb.cpp | 84 +
655 files changed, 34346 insertions(+), 9420 deletions(-)
diff --cc src/backend/cpu/Array.cpp
index d714fd9,5321137..9524a91
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@@ -47,9 -46,10 +47,9 @@@ namespace cp
}
}
-
template<typename T>
Array<T>::Array(af::dim4 dims, TNJ::Node_ptr n) :
- info(-1, dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+ info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
data(), data_dims(dims),
node(n), offset(0), ready(false), owner(true)
{
@@@ -65,44 -65,58 +65,62 @@@
{ }
template<typename T>
- void Array<T>::eval()
+ std::shared_ptr<T> evalNodes(const int &num,
+ const dim4 &odims,
+ const dim4 &ostrs,
+ TNJ::Node_ptr &node)
{
- auto func = [this] {
- if (isReady()) return;
-
+ std::shared_ptr<T> data(memAlloc<T>(num), memFree<T>);
- T *ptr = data.get();
- setId(getActiveDeviceId());
- data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
- bool is_linear = node->isLinear(odims.get());
++ auto func = [&] {
+ T *ptr = data.get();
- dim4 ostrs = strides();
- dim4 odims = dims();
- if (is_linear) {
- for (int i = 0; i < num; i++) {
- ptr[i] = *(T *)node->calc(i);
- }
- } else {
- for (int w = 0; w < (int)odims[3]; w++) {
- dim_t offw = w * ostrs[3];
++ bool is_linear = node->isLinear(odims.get());
- for (int w = 0; w < (int)odims[3]; w++) {
- dim_t offw = w * ostrs[3];
- for (int z = 0; z < (int)odims[2]; z++) {
- dim_t offz = z * ostrs[2] + offw;
++ if (is_linear) {
++ for (int i = 0; i < num; i++) {
++ ptr[i] = *(T *)node->calc(i);
++ }
++ } else {
++ for (int w = 0; w < (int)odims[3]; w++) {
++ dim_t offw = w * ostrs[3];
- for (int z = 0; z < (int)odims[2]; z++) {
- dim_t offz = z * ostrs[2] + offw;
- for (int y = 0; y < (int)odims[1]; y++) {
- dim_t offy = y * ostrs[1] + offz;
++ for (int z = 0; z < (int)odims[2]; z++) {
++ dim_t offz = z * ostrs[2] + offw;
- for (int y = 0; y < (int)odims[1]; y++) {
- dim_t offy = y * ostrs[1] + offz;
- for (int x = 0; x < (int)odims[0]; x++) {
- dim_t id = x + offy;
++ for (int y = 0; y < (int)odims[1]; y++) {
++ dim_t offy = y * ostrs[1] + offz;
- for (int x = 0; x < (int)odims[0]; x++) {
- dim_t id = x + offy;
- ptr[id] = *(T *)node->calc(x, y, z, w);
++ for (int x = 0; x < (int)odims[0]; x++) {
++ dim_t id = x + offy;
+
- ptr[id] = *(T *)node->calc(x, y, z, w);
++ ptr[id] = *(T *)node->calc(x, y, z, w);
++ }
}
}
}
}
-
- ready = true;
- Node_ptr prev = node;
- prev->reset();
- // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
- node.reset();
- }
+ };
+
+ getQueue().enqueue(func);
+
+ return data;
+ }
+
+ template<typename T>
+ void Array<T>::eval()
+ {
+ if (isReady()) return;
+
+ this->setId(getActiveDeviceId());
+
+ data = evalNodes<T>(elements(), dims(), strides(), node);
+
+ ready = true;
+ Node_ptr prev = node;
+ prev->reset();
+ // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
+ node.reset();
}
template<typename T>
diff --cc src/backend/cpu/CMakeLists.txt
index 10a749b,6ab6624..57cf3df
--- a/src/backend/cpu/CMakeLists.txt
+++ b/src/backend/cpu/CMakeLists.txt
@@@ -64,10 -52,12 +64,13 @@@ INCLUDE_DIRECTORIES
"${CMAKE_SOURCE_DIR}/src/backend/cpu"
${FFTW_INCLUDES}
${CBLAS_INCLUDE_DIR}
- ${LAPACK_INCLUDE_DIR}
+ ${CMAKE_BINARY_DIR}/third_party/threads/src/threads
)
+ IF(LAPACK_FOUND)
+ INCLUDE_DIRECTORIES(${LAPACK_INCLUDE_DIR})
+ ENDIF()
+
FILE(GLOB cpu_headers
"*.hpp"
"*.h")
diff --cc src/backend/cpu/platform.cpp
index 73bd587,fc782ea..ac9f418
--- a/src/backend/cpu/platform.cpp
+++ b/src/backend/cpu/platform.cpp
@@@ -8,23 -8,180 +8,182 @@@
********************************************************/
#include <af/version.h>
+ #include <af/defines.h>
#include <platform.hpp>
#include <sstream>
+#include <async_queue.hpp>
+#include <array>
+ #include <algorithm>
+ #include <iostream>
+ #include <string>
+ #include <defines.hpp>
- namespace cpu
+ #ifdef _WIN32
+ #include <limits.h>
+ #include <intrin.h>
+ typedef unsigned __int32 uint32_t;
+ #else
+ #include <stdint.h>
+ #endif
+
+ using namespace std;
+
+ #ifdef USE_CPUID
+
+ #define MAX_INTEL_TOP_LVL 4
+
+ class CPUID {
+ uint32_t regs[4];
+
+ public:
+ explicit CPUID(unsigned funcId, unsigned subFuncId) {
+ #ifdef _WIN32
+ __cpuidex((int *)regs, (int)funcId, (int)subFuncId);
+
+ #else
+ asm volatile
+ ("cpuid" : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "a" (funcId), "c" (subFuncId));
+ #endif
+ }
+
+ inline const uint32_t &EAX() const { return regs[0]; }
+ inline const uint32_t &EBX() const { return regs[1]; }
+ inline const uint32_t &ECX() const { return regs[2]; }
+ inline const uint32_t &EDX() const { return regs[3]; }
+ };
+
+ #endif
+
+ class CPUInfo {
+ public:
+ CPUInfo();
+ string vendor() const { return mVendorId; }
+ string model() const { return mModelName; }
+ int threads() const { return mNumLogCpus; }
+
+ private:
+ // Bit positions for data extractions
+ static const uint32_t LVL_NUM = 0x000000FF;
+ static const uint32_t LVL_TYPE = 0x0000FF00;
+ static const uint32_t LVL_CORES = 0x0000FFFF;
+ static const uint32_t HTT_POS = 0x10000000;
+
+ // Attributes
+ string mVendorId;
+ string mModelName;
+ int mNumSMT;
+ int mNumCores;
+ int mNumLogCpus;
+ bool mIsHTT;
+ };
+
+ #ifndef USE_CPUID
+
+ CPUInfo::CPUInfo()
+ : mVendorId(""), mModelName(""), mNumSMT(0), mNumCores(0), mNumLogCpus(0), mIsHTT(false)
{
+ mVendorId = "Unknown";
+ mModelName= "Unknown";
+ mNumSMT = 1;
+ mNumCores = 1;
+ mNumLogCpus = 1;
+ }
+
+ #else
- static const char *get_system(void)
+ CPUInfo::CPUInfo()
+ : mVendorId(""), mModelName(""), mNumSMT(0), mNumCores(0), mNumLogCpus(0), mIsHTT(false)
{
- return
- #if defined(ARCH_32)
- "32-bit "
- #elif defined(ARCH_64)
- "64-bit "
+ // Get vendor name EAX=0
+ CPUID cpuID1(1, 0);
+ mIsHTT = cpuID1.EDX() & HTT_POS;
+
+ CPUID cpuID0(0, 0);
+ uint32_t HFS = cpuID0.EAX();
+ mVendorId += string((const char *)&cpuID0.EBX(), 4);
+ mVendorId += string((const char *)&cpuID0.EDX(), 4);
+ mVendorId += string((const char *)&cpuID0.ECX(), 4);
+
+ string upVId = mVendorId;
+ for_each(upVId.begin(), upVId.end(), [](char& in) { in = ::toupper(in); });
+ // Get num of cores
+ if (upVId.find("INTEL") != std::string::npos) {
+ mVendorId = "Intel";
+ if(HFS >= 11) {
+ for (int lvl=0; lvl<MAX_INTEL_TOP_LVL; ++lvl) {
+ CPUID cpuID4(0x0B, lvl);
+ uint32_t currLevel = (LVL_TYPE & cpuID4.ECX())>>8;
+ switch(currLevel) {
+ case 0x01: mNumSMT = LVL_CORES & cpuID4.EBX(); break;
+ case 0x02: mNumLogCpus = LVL_CORES & cpuID4.EBX(); break;
+ default: break;
+ }
+ }
+ mNumCores = mNumLogCpus/mNumSMT;
+ } else {
+ if (HFS>=1) {
+ mNumLogCpus = (cpuID1.EBX() >> 16) & 0xFF;
+ if (HFS>=4) {
+ mNumCores = 1 + ((CPUID(4, 0).EAX() >> 26) & 0x3F);
+ }
+ }
+ if (mIsHTT) {
+ if (!(mNumCores>1)) {
+ mNumCores = 1;
+ mNumLogCpus = (mNumLogCpus >= 2 ? mNumLogCpus : 2);
+ }
+ } else {
+ mNumCores = mNumLogCpus = 1;
+ }
+ }
+ } else if (upVId.find("AMD") != std::string::npos) {
+ mVendorId = "AMD";
+ if (HFS>=1) {
+ mNumLogCpus = (cpuID1.EBX() >> 16) & 0xFF;
+ if (CPUID(0x80000000, 0).EAX() >=8) {
+ mNumCores = 1 + ((CPUID(0x80000008, 0).ECX() & 0xFF));
+ }
+ }
+ if (mIsHTT) {
+ if (!(mNumCores>1)) {
+ mNumCores = 1;
+ mNumLogCpus = (mNumLogCpus >= 2 ? mNumLogCpus : 2);
+ }
+ } else {
+ mNumCores = mNumLogCpus = 1;
+ }
+ } else {
+ mVendorId = "Unkown, probably ARM";
+ cout<< "Unexpected vendor id" <<endl;
+ }
+ // Get processor brand string
+ // This seems to be working for both Intel & AMD vendors
+ for(unsigned i=0x80000002; i<0x80000005; ++i) {
+ CPUID cpuID(i, 0);
+ mModelName += string((const char*)&cpuID.EAX(), 4);
+ mModelName += string((const char*)&cpuID.EBX(), 4);
+ mModelName += string((const char*)&cpuID.ECX(), 4);
+ mModelName += string((const char*)&cpuID.EDX(), 4);
+ }
+ mModelName = string(mModelName.c_str());
+ }
+
#endif
+ namespace cpu
+ {
+
+ int getBackend()
+ {
+ return AF_BACKEND_CPU;
+ }
+
+ static const std::string get_system(void)
+ {
+ std::string arch = (sizeof(void *) == 4) ? "32-bit " : "64-bit ";
+
+ return arch +
#if defined(OS_LNX)
"Linux";
#elif defined(OS_WIN)
diff --cc src/backend/cpu/platform.hpp
index 2bf6bf2,2e52cd1..9abf075
--- a/src/backend/cpu/platform.hpp
+++ b/src/backend/cpu/platform.hpp
@@@ -9,9 -9,9 +9,11 @@@
#include <string>
+class async_queue;
+
namespace cpu {
+ int getBackend();
+
std::string getInfo();
bool isDoubleSupported(int device);
diff --cc src/backend/cpu/random.cpp
index 7ecf272,ab4230e..8c83ad6
--- a/src/backend/cpu/random.cpp
+++ b/src/backend/cpu/random.cpp
@@@ -179,6 -144,30 +180,29 @@@ INSTANTIATE_NORMAL(double
INSTANTIATE_NORMAL(cfloat)
INSTANTIATE_NORMAL(cdouble)
-
+ template<>
+ Array<char> randu(const af::dim4 &dims)
+ {
+ static unsigned long long my_seed = 0;
+ if (is_first) {
+ setSeed(gen_seed);
+ my_seed = gen_seed;
+ }
+
+ static auto gen = urand<float>(generator);
+
+ if (my_seed != gen_seed) {
+ gen = urand<float>(generator);
+ my_seed = gen_seed;
+ }
+
+ Array<char> outArray = createEmptyArray<char>(dims);
+ char *outPtr = outArray.get();
+ for (int i = 0; i < (int)outArray.elements(); i++) {
+ outPtr[i] = gen() > 0.5;
+ }
+ return outArray;
+ }
void setSeed(const uintl seed)
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list