[arrayfire] 18/284: Merge additional changes for async CPU.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:14 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 4a8e7234f511973bebb02dc2eafb61bf0d561338
Merge: 1842bcf 5d428df
Author: Umar Arshad <umar at arrayfire.com>
Date:   Tue Nov 17 16:47:09 2015 -0500

    Merge additional changes for async CPU.

 ArrayFireConfig.cmake.in                           |    30 +-
 CMakeLists.txt                                     |    38 +-
 CMakeModules/FindCBLAS.cmake                       |    25 +
 CMakeModules/FindLAPACKE.cmake                     |   144 +-
 CMakeModules/FindOpenCL.cmake                      |    34 +-
 CMakeModules/Version.cmake                         |    21 +-
 CMakeModules/build_clBLAS.cmake                    |     2 +-
 CMakeModules/build_forge.cmake                     |     2 +-
 CMakeModules/osx_install/OSXInstaller.cmake        |    16 +-
 CMakeModules/osx_install/cpu_scripts/postinstall   |     6 +-
 CMakeModules/osx_install/cuda_scripts/postinstall  |     4 +-
 CMakeModules/osx_install/distribution.dist         |    35 +-
 .../osx_install/opencl_scripts/postinstall         |     4 +-
 CMakeModules/version.h.in                          |     3 +-
 COPYRIGHT.md                                       |    12 +-
 LICENSES/OpenSIFT License.txt                      |    57 +
 README.md                                          |    14 +-
 assets                                             |     2 +-
 docs/CMakeLists.txt                                |     2 +-
 docs/details/algorithm.dox                         |    38 +-
 docs/details/array.dox                             |    13 +-
 docs/details/backend.dox                           |    66 +
 docs/details/data.dox                              |    44 +-
 docs/details/image.dox                             |   136 +-
 docs/details/index.dox                             |    25 +
 docs/details/lapack.dox                            |    22 +
 docs/details/util.dox                              |   148 +
 docs/details/vision.dox                            |    76 +
 docs/doxygen.mk                                    |     3 +-
 docs/layout.xml                                    |     2 +
 docs/pages/INSTALL.md                              |   166 +-
 docs/pages/README.md                               |    40 +-
 docs/pages/getting_started.md                      |     2 +
 docs/pages/gfor.md                                 |    32 -
 docs/pages/matrix_manipulation.md                  |   298 +-
 docs/pages/release_notes.md                        |   323 +-
 docs/pages/unified_backend.md                      |   212 +
 docs/pages/using_on_linux.md                       |   200 +-
 docs/pages/using_on_osx.md                         |   233 +-
 docs/pages/using_on_windows.md                     |   304 +-
 examples/CMakeLists.txt                            |    27 +-
 examples/CMakeModules/FindOpenCL.cmake             |    34 +-
 examples/common/progress.h                         |     2 +-
 examples/financial/heston_model.cpp                |    20 +-
 examples/graphics/plot3.cpp                        |    58 +
 examples/graphics/surface.cpp                      |    55 +
 examples/image_processing/image_editing.cpp        |     2 +-
 examples/lin_algebra/svd.cpp                       |    55 +
 examples/pde/swe.cpp                               |    86 +
 examples/unified/basic.cpp                         |    78 +
 include/af/algorithm.h                             |    17 +-
 include/af/arith.h                                 |    31 +-
 include/af/array.h                                 |   117 +-
 include/af/backend.h                               |   105 +
 include/af/complex.h                               |    51 +-
 include/af/cuda.h                                  |    70 +
 include/af/data.h                                  |   145 +-
 include/af/defines.h                               |   110 +-
 include/af/device.h                                |     4 +
 include/af/dim4.hpp                                |     6 +-
 include/af/graphics.h                              |   125 +-
 include/af/image.h                                 |   428 +-
 include/af/index.h                                 |    82 +-
 include/af/lapack.h                                |    55 +
 include/af/macros.h                                |    24 +
 include/af/opencl.h                                |   426 +-
 include/af/signal.h                                |    42 +
 include/af/statistics.h                            |    28 +-
 include/af/traits.hpp                              |    24 +
 include/af/util.h                                  |   197 +-
 include/af/vision.h                                |   296 +-
 include/arrayfire.h                                |    79 +-
 src/api/c/approx.cpp                               |    30 +-
 src/api/c/assign.cpp                               |    88 +-
 src/api/c/bilateral.cpp                            |     2 +
 src/api/c/binary.cpp                               |     8 +
 src/api/c/cast.cpp                                 |     2 +
 src/api/c/colorspace.cpp                           |    60 +-
 src/api/c/convolve.cpp                             |    52 +-
 src/api/c/corrcoef.cpp                             |     2 +
 src/api/c/covariance.cpp                           |    12 +-
 src/api/c/data.cpp                                 |    53 +-
 src/api/c/device.cpp                               |    60 +-
 src/api/c/diff.cpp                                 |     4 +
 src/api/c/dog.cpp                                  |    13 +-
 src/api/c/err_common.cpp                           |     6 +-
 src/api/c/fast.cpp                                 |     2 +
 src/api/c/fftconvolve.cpp                          |     4 +
 src/api/c/filters.cpp                              |     2 +
 src/api/c/flip.cpp                                 |     2 +
 src/api/c/graphics_common.cpp                      |    60 +-
 src/api/c/graphics_common.hpp                      |    18 +-
 src/api/c/handle.hpp                               |     3 +
 src/api/c/hist.cpp                                 |     5 +-
 src/api/c/histeq.cpp                               |     4 +
 src/api/c/histogram.cpp                            |    24 +-
 src/api/c/homography.cpp                           |    88 +
 src/api/c/image.cpp                                |    21 +-
 src/api/c/imageio.cpp                              |   513 +-
 src/api/c/imageio2.cpp                             |   389 +
 src/api/c/imageio_helper.h                         |   102 +
 src/api/c/implicit.cpp                             |     6 +
 src/api/c/index.cpp                                |    41 +-
 src/api/c/join.cpp                                 |     4 +
 src/api/c/match_template.cpp                       |     2 +
 src/api/c/mean.cpp                                 |   131 +-
 src/api/c/meanshift.cpp                            |     4 +
 src/api/c/median.cpp                               |    11 +-
 src/api/c/moddims.cpp                              |     2 +
 src/api/c/morph.cpp                                |     4 +
 src/api/c/nearest_neighbour.cpp                    |    13 +-
 src/api/c/plot.cpp                                 |     5 +-
 src/api/c/plot3.cpp                                |   113 +
 src/api/c/print.cpp                                |   113 +-
 src/api/c/reduce.cpp                               |    16 +
 src/api/c/regions.cpp                              |     2 +
 src/api/c/reorder.cpp                              |     2 +
 src/api/c/replace.cpp                              |   113 +
 src/api/c/resize.cpp                               |     2 +
 src/api/c/rgb_gray.cpp                             |     2 +
 src/api/c/rotate.cpp                               |     2 +
 src/api/c/sat.cpp                                  |     2 +
 src/api/c/scan.cpp                                 |     2 +
 src/api/c/select.cpp                               |   162 +
 src/api/c/set.cpp                                  |    12 +
 src/api/c/shift.cpp                                |     2 +
 src/api/c/sift.cpp                                 |   132 +
 src/api/c/sobel.cpp                                |     2 +
 src/api/c/sort.cpp                                 |    16 +
 src/api/c/stats.h                                  |    45 +-
 src/api/c/stdev.cpp                                |    24 +-
 src/api/c/stream.cpp                               |   364 +
 src/api/c/surface.cpp                              |   135 +
 src/api/c/susan.cpp                                |     9 +-
 src/api/c/svd.cpp                                  |   128 +
 src/api/c/tile.cpp                                 |     2 +
 src/api/c/transform.cpp                            |     2 +
 src/api/c/transpose.cpp                            |    20 +-
 src/api/c/type_util.cpp                            |    10 +-
 src/api/c/unary.cpp                                |    89 +-
 src/api/c/unwrap.cpp                               |    33 +-
 src/api/c/util.cpp                                 |    81 +
 src/api/c/var.cpp                                  |    18 +-
 src/api/c/where.cpp                                |     2 +
 src/api/c/wrap.cpp                                 |    80 +
 src/api/c/ycbcr_rgb.cpp                            |   160 +
 src/api/cpp/array.cpp                              |    67 +-
 src/api/cpp/corrcoef.cpp                           |     4 +
 src/api/cpp/data.cpp                               |    34 +
 src/api/cpp/device.cpp                             |    32 +
 src/api/cpp/dog.cpp                                |     2 +-
 src/api/cpp/features.cpp                           |     1 -
 src/api/cpp/graphics.cpp                           |    21 +
 src/api/cpp/homography.cpp                         |    32 +
 src/api/cpp/imageio.cpp                            |    31 +
 src/api/cpp/index.cpp                              |     2 +-
 src/api/cpp/lapack.cpp                             |    18 +
 src/api/cpp/mean.cpp                               |     4 +
 src/api/cpp/median.cpp                             |     6 +-
 src/api/cpp/reduce.cpp                             |     6 +
 src/api/cpp/seq.cpp                                |    10 +-
 src/api/cpp/sift.cpp                               |    51 +
 src/api/cpp/stdev.cpp                              |     4 +
 src/api/cpp/timing.cpp                             |     8 +-
 src/api/cpp/unwrap.cpp                             |     5 +-
 src/api/cpp/util.cpp                               |    41 +-
 src/api/cpp/var.cpp                                |     2 +
 src/api/cpp/{unwrap.cpp => wrap.cpp}               |    13 +-
 src/api/cpp/{dog.cpp => ycbcr_rgb.cpp}             |    15 +-
 src/api/unified/CMakeLists.txt                     |    72 +
 src/api/unified/algorithm.cpp                      |   148 +
 src/api/unified/arith.cpp                          |   102 +
 src/api/unified/array.cpp                          |   108 +
 src/api/unified/blas.cpp                           |    40 +
 src/api/unified/data.cpp                           |   180 +
 src/api/unified/device.cpp                         |   140 +
 src/api/unified/features.cpp                       |    44 +
 src/api/unified/graphics.cpp                       |    83 +
 src/api/unified/image.cpp                          |   252 +
 src/api/unified/index.cpp                          |    54 +
 src/api/unified/lapack.cpp                         |    98 +
 src/api/unified/signal.cpp                         |   143 +
 src/api/unified/statistics.cpp                     |    96 +
 src/api/unified/symbol_manager.cpp                 |   223 +
 src/api/unified/symbol_manager.hpp                 |   108 +
 src/api/unified/util.cpp                           |    63 +
 src/api/unified/vision.cpp                         |    86 +
 src/backend/ArrayInfo.cpp                          |    82 +
 src/backend/ArrayInfo.hpp                          |    37 +-
 src/backend/cblas.cpp                              |   105 +-
 src/backend/cpu/Array.cpp                          |    63 +-
 src/backend/cpu/CMakeLists.txt                     |     5 +-
 src/backend/cpu/TNJ/BinaryNode.hpp                 |    19 +-
 src/backend/cpu/TNJ/BufferNode.hpp                 |    45 +-
 src/backend/cpu/TNJ/Node.hpp                       |    10 +-
 src/backend/cpu/TNJ/ScalarNode.hpp                 |     9 +-
 src/backend/cpu/TNJ/UnaryNode.hpp                  |    15 +-
 src/backend/cpu/approx.cpp                         |   206 +-
 src/backend/cpu/assign.cpp                         |     2 +
 src/backend/cpu/bilateral.cpp                      |     2 +
 src/backend/cpu/blas.cpp                           |    37 +-
 src/backend/cpu/convolve.cpp                       |     4 +
 src/backend/cpu/copy.cpp                           |    54 +-
 src/backend/cpu/diagonal.cpp                       |     2 +
 src/backend/cpu/diff.cpp                           |     2 +
 src/backend/cpu/fast.cpp                           |     2 +
 src/backend/cpu/fftconvolve.cpp                    |     4 +
 src/backend/cpu/hist_graphics.cpp                  |     2 +
 src/backend/cpu/histogram.cpp                      |    12 +-
 src/backend/cpu/histogram.hpp                      |     2 +-
 src/backend/cpu/homography.cpp                     |   383 +
 src/backend/cpu/{histogram.hpp => homography.hpp}  |    10 +-
 src/backend/cpu/identity.cpp                       |     4 +-
 src/backend/cpu/image.cpp                          |     2 +
 src/backend/cpu/index.cpp                          |     2 +
 src/backend/cpu/iota.cpp                           |     2 +
 src/backend/cpu/ireduce.cpp                        |     4 +
 src/backend/cpu/join.cpp                           |     4 +
 src/backend/cpu/lookup.cpp                         |     6 +
 src/backend/cpu/match_template.cpp                 |     2 +
 src/backend/cpu/math.cpp                           |     3 -
 src/backend/cpu/math.hpp                           |     3 -
 src/backend/cpu/meanshift.cpp                      |     4 +
 src/backend/cpu/medfilt.cpp                        |     2 +
 src/backend/cpu/memory.cpp                         |     2 +
 src/backend/cpu/morph.cpp                          |     2 +
 src/backend/cpu/nearest_neighbour.cpp              |    13 +-
 src/backend/cpu/platform.cpp                       |   192 +-
 src/backend/cpu/platform.hpp                       |     2 +
 src/backend/cpu/plot.cpp                           |     2 +
 src/backend/cpu/{plot.cpp => plot3.cpp}            |    12 +-
 src/backend/{opencl/plot.hpp => cpu/plot3.hpp}     |     5 +-
 src/backend/cpu/random.cpp                         |    26 +-
 src/backend/cpu/range.cpp                          |     2 +
 src/backend/cpu/reduce.cpp                         |    38 +-
 src/backend/cpu/regions.cpp                        |     2 +
 src/backend/cpu/reorder.cpp                        |     2 +
 src/backend/cpu/resize.cpp                         |     2 +
 src/backend/cpu/rotate.cpp                         |     2 +
 src/backend/cpu/scan.cpp                           |     2 +
 src/backend/cpu/select.cpp                         |   145 +
 src/backend/cpu/{unwrap.hpp => select.hpp}         |    11 +-
 src/backend/cpu/set.cpp                            |     4 +
 src/backend/cpu/shift.cpp                          |     2 +
 src/backend/cpu/sift.cpp                           |    67 +
 src/backend/cpu/sift.hpp                           |    27 +
 src/backend/cpu/sift_nonfree.hpp                   |  1193 ++
 src/backend/cpu/sobel.cpp                          |     2 +
 src/backend/cpu/sort.cpp                           |     4 +
 src/backend/cpu/sort_by_key.cpp                    |     9 +
 src/backend/cpu/sort_index.cpp                     |     4 +
 src/backend/cpu/{plot.cpp => surface.cpp}          |    12 +-
 src/backend/{opencl/plot.hpp => cpu/surface.hpp}   |     5 +-
 src/backend/cpu/susan.cpp                          |    21 +-
 src/backend/cpu/svd.cpp                            |   121 +
 src/backend/cpu/{histogram.hpp => svd.hpp}         |     8 +-
 src/backend/cpu/tile.cpp                           |     2 +
 src/backend/cpu/transform.cpp                      |     2 +
 src/backend/cpu/transpose.cpp                      |     2 +
 src/backend/cpu/triangle.cpp                       |     2 +
 src/backend/cpu/types.hpp                          |     1 +
 src/backend/cpu/unwrap.cpp                         |    46 +-
 src/backend/cpu/unwrap.hpp                         |     3 +-
 src/backend/cpu/where.cpp                          |     2 +
 src/backend/cpu/wrap.cpp                           |   124 +
 src/backend/cpu/{unwrap.hpp => wrap.hpp}           |    11 +-
 src/backend/cuda/Array.cpp                         |    14 +-
 src/backend/cuda/CMakeLists.txt                    |    47 +-
 src/backend/cuda/JIT/arith.cu                      |     2 +
 src/backend/cuda/JIT/cast.cu                       |    20 +-
 src/backend/cuda/JIT/exp.cu                        |     4 +
 src/backend/cuda/JIT/hyper.cu                      |     2 +
 src/backend/cuda/JIT/logic.cu                      |     8 +
 src/backend/cuda/JIT/numeric.cu                    |    42 +-
 src/backend/cuda/JIT/trig.cu                       |     4 +
 src/backend/cuda/JIT/types.h                       |     1 +
 src/backend/cuda/all.cu                            |     2 +
 src/backend/cuda/any.cu                            |     2 +
 src/backend/cuda/assign.cu                         |     8 +-
 src/backend/cuda/bilateral.cu                      |     2 +
 src/backend/cuda/blas.cpp                          |     4 +-
 src/backend/cuda/cholesky.cu                       |    28 +
 src/backend/cuda/convolve.cpp                      |     4 +
 src/backend/cuda/copy.cu                           |    38 +-
 src/backend/cuda/count.cu                          |     2 +
 src/backend/cuda/cpu_lapack/cpu_cholesky.cpp       |   109 +
 .../{unwrap.hpp => cpu_lapack/cpu_cholesky.hpp}    |    10 +-
 src/backend/cuda/cpu_lapack/cpu_inverse.cpp        |    92 +
 .../plot.hpp => cuda/cpu_lapack/cpu_inverse.hpp}   |    14 +-
 src/backend/cuda/cpu_lapack/cpu_lu.cpp             |   197 +
 .../cuda/{unwrap.hpp => cpu_lapack/cpu_lu.hpp}     |    10 +-
 src/backend/cuda/cpu_lapack/cpu_qr.cpp             |   160 +
 .../cuda/{unwrap.hpp => cpu_lapack/cpu_qr.hpp}     |    10 +-
 src/backend/cuda/cpu_lapack/cpu_solve.cpp          |   206 +
 .../cuda/{unwrap.hpp => cpu_lapack/cpu_solve.hpp}  |    11 +-
 src/backend/cuda/cpu_lapack/cpu_svd.cpp            |   153 +
 .../cuda/{histogram.hpp => cpu_lapack/cpu_svd.hpp} |    10 +-
 src/backend/cuda/cpu_lapack/cpu_triangle.hpp       |    52 +
 src/backend/cuda/cpu_lapack/lapack_helper.hpp      |    35 +
 src/backend/cuda/diagonal.cu                       |     2 +
 src/backend/cuda/diff.cu                           |     2 +
 src/backend/cuda/dilate.cu                         |     2 +
 src/backend/cuda/dilate3d.cu                       |     2 +
 src/backend/cuda/driver.cpp                        |     2 +-
 src/backend/cuda/erode.cu                          |     2 +
 src/backend/cuda/erode3d.cu                        |     2 +
 src/backend/cuda/fast.cu                           |     2 +
 src/backend/cuda/fast_pyramid.cu                   |     2 +
 src/backend/cuda/fftconvolve.cu                    |     4 +
 src/backend/cuda/hist_graphics.cu                  |     2 +
 src/backend/cuda/histogram.cu                      |    23 +-
 src/backend/cuda/histogram.hpp                     |     2 +-
 src/backend/cuda/homography.cu                     |    79 +
 src/backend/cuda/{histogram.hpp => homography.hpp} |    10 +-
 src/backend/cuda/identity.cu                       |     2 +
 src/backend/cuda/image.cu                          |     2 +
 src/backend/cuda/index.cu                          |     6 +-
 src/backend/cuda/interopManager.cu                 |    34 +
 src/backend/cuda/interopManager.hpp                |     2 +
 src/backend/cuda/inverse.cu                        |    22 +
 src/backend/cuda/iota.cu                           |     2 +
 src/backend/cuda/ireduce.cu                        |     4 +
 src/backend/cuda/jit.cpp                           |     2 +
 src/backend/cuda/join.cu                           |    22 +-
 src/backend/cuda/kernel/approx.hpp                 |   121 +-
 src/backend/cuda/kernel/atomics.hpp                |    59 +
 src/backend/cuda/kernel/convolve.cu                |    16 +-
 src/backend/cuda/kernel/convolve_separable.cu      |    12 +-
 src/backend/cuda/kernel/fast.hpp                   |    24 +-
 src/backend/cuda/kernel/fftconvolve.hpp            |    18 +-
 src/backend/cuda/kernel/harris.hpp                 |     7 +-
 src/backend/cuda/kernel/histogram.hpp              |    33 +-
 src/backend/cuda/kernel/homography.hpp             |   699 ++
 src/backend/cuda/kernel/ireduce.hpp                |    24 +-
 src/backend/cuda/kernel/memcopy.hpp                |     3 +
 src/backend/cuda/kernel/nearest_neighbour.hpp      |     9 +
 src/backend/cuda/kernel/orb.hpp                    |    47 +-
 src/backend/cuda/kernel/reduce.hpp                 |    21 +-
 src/backend/cuda/kernel/regions.hpp                |    11 +-
 src/backend/cuda/kernel/select.hpp                 |   155 +
 src/backend/cuda/kernel/shared.hpp                 |     4 +
 src/backend/cuda/kernel/sift_nonfree.hpp           |  1627 +++
 src/backend/cuda/kernel/susan.hpp                  |     4 +-
 src/backend/cuda/kernel/triangle.hpp               |     8 +-
 src/backend/cuda/kernel/unwrap.hpp                 |   103 +-
 src/backend/cuda/kernel/where.hpp                  |     6 +-
 src/backend/cuda/kernel/wrap.hpp                   |   113 +
 src/backend/cuda/lookup.cu                         |     6 +
 src/backend/cuda/lu.cu                             |    30 +
 src/backend/cuda/match_template.cu                 |     2 +
 src/backend/cuda/math.hpp                          |     9 +
 src/backend/cuda/max.cu                            |     2 +
 src/backend/cuda/meanshift.cu                      |     4 +
 src/backend/cuda/medfilt.cu                        |     2 +
 src/backend/cuda/memory.cpp                        |     2 +
 src/backend/cuda/min.cu                            |     2 +
 src/backend/cuda/nearest_neighbour.cu              |     2 +
 src/backend/cuda/platform.cpp                      |    40 +-
 src/backend/cuda/platform.hpp                      |     4 +
 src/backend/cuda/plot.cu                           |     2 +
 src/backend/cuda/{plot.cu => plot3.cu}             |    12 +-
 src/backend/{opencl/plot.hpp => cuda/plot3.hpp}    |     5 +-
 src/backend/cuda/product.cu                        |     4 +-
 src/backend/cuda/qr.cu                             |    29 +
 src/backend/cuda/random.cu                         |     2 +
 src/backend/cuda/range.cu                          |     2 +
 src/backend/cuda/regions.cu                        |     2 +
 src/backend/cuda/reorder.cu                        |     2 +
 src/backend/cuda/resize.cu                         |     2 +
 src/backend/cuda/rotate.cu                         |     2 +
 src/backend/cuda/scan.cu                           |     2 +
 src/backend/cuda/select.cu                         |    53 +
 src/backend/cuda/{unwrap.hpp => select.hpp}        |    11 +-
 src/backend/cuda/set.cu                            |     4 +
 src/backend/cuda/shift.cu                          |     2 +
 src/backend/cuda/sift.cu                           |    95 +
 src/backend/cuda/sift.hpp                          |    27 +
 src/backend/cuda/sobel.cu                          |     2 +
 src/backend/cuda/solve.cu                          |    31 +
 src/backend/cuda/sort.cu                           |     4 +
 .../cuda/{dilate.cu => sort_by_key/ascd_s16.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/ascd_s64.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/ascd_u16.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/ascd_u64.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/desc_s16.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/desc_s64.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/desc_u16.cu}    |    11 +-
 .../cuda/{dilate.cu => sort_by_key/desc_u64.cu}    |    11 +-
 src/backend/cuda/sort_by_key_impl.hpp              |     6 +-
 src/backend/cuda/sort_index.cu                     |     4 +
 src/backend/cuda/sum.cu                            |    10 +
 src/backend/cuda/{plot.cu => surface.cu}           |    12 +-
 src/backend/{opencl/plot.hpp => cuda/surface.hpp}  |     5 +-
 src/backend/cuda/susan.cu                          |    22 +-
 src/backend/cuda/svd.cu                            |   183 +
 src/backend/cuda/{histogram.hpp => svd.hpp}        |     8 +-
 src/backend/cuda/tile.cu                           |     2 +
 src/backend/cuda/transform.cu                      |     2 +
 src/backend/cuda/transpose.cu                      |     2 +
 src/backend/cuda/transpose_inplace.cu              |     2 +
 src/backend/cuda/triangle.cu                       |     2 +
 src/backend/cuda/types.cpp                         |     8 +
 src/backend/cuda/types.hpp                         |     3 +-
 src/backend/cuda/unwrap.cu                         |    28 +-
 src/backend/cuda/unwrap.hpp                        |     3 +-
 src/backend/cuda/where.cu                          |     2 +
 src/backend/cuda/wrap.cu                           |    59 +
 src/backend/cuda/{unwrap.hpp => wrap.hpp}          |    11 +-
 src/backend/defines.hpp                            |     4 +
 src/backend/dim4.cpp                               |    64 +-
 src/backend/lapacke.cpp                            |   168 +
 src/backend/lapacke.hpp                            |    93 +-
 src/backend/opencl/Array.cpp                       |     4 +-
 src/backend/opencl/CMakeLists.txt                  |    56 +-
 src/backend/opencl/all.cpp                         |     2 +
 src/backend/opencl/any.cpp                         |     2 +
 src/backend/opencl/assign.cpp                      |     6 +-
 src/backend/opencl/bilateral.cpp                   |     2 +
 src/{api/cpp/dog.cpp => backend/opencl/cache.hpp}  |    23 +-
 src/backend/opencl/cl.hpp                          | 10803 ++++++++++---------
 src/backend/opencl/convolve.cpp                    |     4 +
 src/backend/opencl/convolve_separable.cpp          |    48 +-
 src/backend/opencl/copy.cpp                        |    24 +-
 src/backend/opencl/count.cpp                       |     2 +
 src/backend/opencl/diagonal.cpp                    |     2 +
 src/backend/opencl/diff.cpp                        |     2 +
 src/backend/opencl/dilate.cpp                      |     2 +
 src/backend/opencl/dilate3d.cpp                    |     2 +
 src/backend/opencl/erode.cpp                       |     2 +
 src/backend/opencl/erode3d.cpp                     |     2 +
 src/backend/opencl/fast.cpp                        |     2 +
 src/backend/opencl/fftconvolve.cpp                 |     4 +
 src/backend/opencl/hist_graphics.cpp               |     2 +
 src/backend/opencl/histogram.cpp                   |    23 +-
 src/backend/opencl/histogram.hpp                   |     2 +-
 src/backend/opencl/homography.cpp                  |    96 +
 src/backend/opencl/{plot.hpp => homography.hpp}    |    17 +-
 src/backend/opencl/identity.cpp                    |     2 +
 src/backend/opencl/image.cpp                       |     6 +-
 src/backend/opencl/index.cpp                       |     6 +-
 src/backend/opencl/interopManager.cpp              |    24 +
 src/backend/opencl/interopManager.hpp              |     2 +
 src/backend/opencl/iota.cpp                        |     2 +
 src/backend/opencl/ireduce.cpp                     |     4 +
 src/backend/opencl/jit.cpp                         |     8 +-
 src/backend/opencl/join.cpp                        |     4 +
 src/backend/opencl/kernel/approx.hpp               |    30 +-
 src/backend/opencl/kernel/approx1.cl               |    51 +-
 src/backend/opencl/kernel/approx2.cl               |    69 +-
 src/backend/opencl/kernel/assign.cl                |     5 +-
 src/backend/opencl/kernel/convolve/conv1.cpp       |     4 +
 src/backend/opencl/kernel/convolve/conv2_impl.hpp  |   117 +-
 .../{plot.hpp => kernel/convolve/conv2_s16.cpp}    |    16 +-
 .../{plot.hpp => kernel/convolve/conv2_s64.cpp}    |    16 +-
 .../{plot.hpp => kernel/convolve/conv2_u16.cpp}    |    16 +-
 .../{plot.hpp => kernel/convolve/conv2_u64.cpp}    |    16 +-
 src/backend/opencl/kernel/convolve/conv3.cpp       |     4 +
 src/backend/opencl/kernel/convolve_separable.cpp   |   135 +
 src/backend/opencl/kernel/convolve_separable.hpp   |    79 +-
 src/backend/opencl/kernel/fast.cl                  |     2 +-
 src/backend/opencl/kernel/fast.hpp                 |   100 +-
 src/backend/opencl/kernel/gradient.cl              |     3 +-
 src/backend/opencl/kernel/gradient.hpp             |     7 +-
 src/backend/opencl/kernel/harris.hpp               |    47 +-
 src/backend/opencl/kernel/histogram.cl             |    24 +-
 src/backend/opencl/kernel/histogram.hpp            |    15 +-
 src/backend/opencl/kernel/homography.cl            |   516 +
 src/backend/opencl/kernel/homography.hpp           |   261 +
 src/backend/opencl/kernel/hsv_rgb.cl               |     2 +-
 src/backend/opencl/kernel/iir.cl                   |     4 +-
 src/backend/opencl/kernel/index.cl                 |     5 +-
 src/backend/opencl/kernel/iops.cl                  |    24 +-
 src/backend/opencl/kernel/ireduce.hpp              |   196 +-
 src/backend/opencl/kernel/lookup.cl                |     5 +-
 src/backend/opencl/kernel/nearest_neighbour.cl     |    12 +-
 src/backend/opencl/kernel/nearest_neighbour.hpp    |    58 +-
 src/backend/opencl/kernel/orb.hpp                  |    13 +-
 src/backend/opencl/kernel/reduce.hpp               |   252 +-
 src/backend/opencl/kernel/regions.cl               |     6 +-
 src/backend/opencl/kernel/regions.hpp              |     5 +
 src/backend/opencl/kernel/rotate.hpp               |     5 +
 src/backend/opencl/kernel/scan_dim.cl              |     2 +
 src/backend/opencl/kernel/scan_dim.hpp             |   207 +-
 src/backend/opencl/kernel/scan_first.hpp           |   196 +-
 src/backend/opencl/kernel/select.cl                |    97 +
 src/backend/opencl/kernel/select.hpp               |   175 +
 src/backend/opencl/kernel/set.cl                   |    20 -
 src/backend/opencl/kernel/set.hpp                  |    67 -
 src/backend/opencl/kernel/sift_nonfree.cl          |  1020 ++
 src/backend/opencl/kernel/sift_nonfree.hpp         |   827 ++
 src/backend/opencl/kernel/sort.hpp                 |    30 +-
 src/backend/opencl/kernel/sort_by_key.hpp          |    26 +-
 src/backend/opencl/kernel/sort_index.hpp           |    28 +-
 src/backend/opencl/kernel/susan.cl                 |     5 +-
 src/backend/opencl/kernel/susan.hpp                |     4 +-
 src/backend/opencl/kernel/transform.hpp            |     7 +-
 src/backend/opencl/kernel/transform_interp.cl      |     4 +-
 src/backend/opencl/kernel/unwrap.cl                |    55 +-
 src/backend/opencl/kernel/unwrap.hpp               |    91 +-
 src/backend/opencl/kernel/where.cl                 |     2 +-
 src/backend/opencl/kernel/where.hpp                |     7 +-
 src/backend/opencl/kernel/wrap.cl                  |    74 +
 src/backend/opencl/kernel/wrap.hpp                 |   112 +
 src/backend/opencl/lookup.cpp                      |     6 +
 src/backend/opencl/magma/gebrd.cpp                 |   368 +
 src/backend/opencl/magma/geqrf2.cpp                |    17 +-
 src/backend/opencl/magma/geqrf3.cpp                |    19 +-
 src/backend/opencl/magma/getrf.cpp                 |   130 +-
 src/backend/opencl/magma/getrs.cpp                 |    30 +-
 src/backend/opencl/magma/labrd.cpp                 |   668 ++
 src/backend/opencl/magma/larfb.cpp                 |   125 +-
 src/backend/opencl/magma/magma.h                   |    21 +
 src/backend/opencl/magma/magma_blas.h              |    57 +-
 src/backend/opencl/magma/magma_cpu_blas.h          |    91 +
 src/backend/opencl/magma/magma_cpu_lapack.h        |   143 +-
 src/backend/opencl/magma/magma_helper.cpp          |    20 +
 src/backend/opencl/magma/magma_helper.h            |     3 +
 src/backend/opencl/magma/potrf.cpp                 |   130 +-
 src/backend/opencl/magma/ungqr.cpp                 |    11 +-
 src/backend/opencl/magma/unmqr.cpp                 |    11 +-
 src/backend/opencl/magma/unmqr2.cpp                |    10 +-
 src/backend/opencl/match_template.cpp              |     2 +
 src/backend/opencl/max.cpp                         |     2 +
 src/backend/opencl/meanshift.cpp                   |     4 +
 src/backend/opencl/medfilt.cpp                     |     2 +
 src/backend/opencl/memory.cpp                      |     2 +
 src/backend/opencl/min.cpp                         |     2 +
 src/backend/opencl/nearest_neighbour.cpp           |    63 +-
 src/backend/opencl/platform.cpp                    |    66 +-
 src/backend/opencl/platform.hpp                    |     5 +
 src/backend/opencl/plot.cpp                        |     2 +
 src/backend/opencl/plot.hpp                        |     1 -
 src/backend/opencl/{plot.cpp => plot3.cpp}         |    19 +-
 src/backend/opencl/{plot.hpp => plot3.hpp}         |     3 +-
 src/backend/opencl/product.cpp                     |     2 +
 src/backend/opencl/random.cpp                      |     2 +
 src/backend/opencl/range.cpp                       |     2 +
 src/backend/opencl/regions.cpp                     |     2 +
 src/backend/opencl/reorder.cpp                     |     2 +
 src/backend/opencl/resize.cpp                      |     2 +
 src/backend/opencl/rotate.cpp                      |     2 +
 src/backend/opencl/scan.cpp                        |    14 +-
 src/backend/opencl/select.cpp                      |    54 +
 src/backend/opencl/{plot.hpp => select.hpp}        |    16 +-
 src/backend/opencl/set.cpp                         |    48 +-
 src/backend/opencl/shift.cpp                       |     2 +
 src/backend/opencl/sift.cpp                        |    87 +
 src/backend/opencl/sift.hpp                        |    27 +
 src/backend/opencl/sobel.cpp                       |     2 +
 src/backend/opencl/solve.cpp                       |    80 +-
 src/backend/opencl/sort.cpp                        |     4 +
 .../opencl/{plot.hpp => sort_by_key/b8.cpp}        |    13 +-
 .../opencl/{plot.hpp => sort_by_key/f32.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/f64.cpp}       |    13 +-
 .../{sort_by_key.cpp => sort_by_key/impl.hpp}      |    38 +-
 .../opencl/{plot.hpp => sort_by_key/s16.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/s32.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/s64.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/u16.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/u32.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/u64.cpp}       |    13 +-
 .../opencl/{plot.hpp => sort_by_key/u8.cpp}        |    13 +-
 src/backend/opencl/sort_index.cpp                  |     4 +
 src/backend/opencl/sum.cpp                         |    10 +
 src/backend/opencl/{plot.cpp => surface.cpp}       |    16 +-
 src/backend/opencl/{plot.hpp => surface.hpp}       |     2 +-
 src/backend/opencl/susan.cpp                       |    40 +-
 src/backend/opencl/svd.cpp                         |   260 +
 src/backend/opencl/{histogram.hpp => svd.hpp}      |     7 +-
 src/backend/opencl/tile.cpp                        |     2 +
 src/backend/opencl/traits.hpp                      |    10 -
 src/backend/opencl/transform.cpp                   |     2 +
 src/backend/opencl/transpose.cpp                   |     2 +
 src/backend/opencl/transpose_inplace.cpp           |     2 +
 src/backend/opencl/triangle.cpp                    |     2 +
 src/backend/opencl/types.cpp                       |     2 +
 src/backend/opencl/types.hpp                       |     1 +
 src/backend/opencl/unwrap.cpp                      |    26 +-
 src/backend/opencl/unwrap.hpp                      |     3 +-
 src/backend/opencl/where.cpp                       |     2 +
 src/backend/opencl/wrap.cpp                        |    59 +
 src/backend/opencl/{plot.hpp => wrap.hpp}          |    17 +-
 test/CMakeLists.txt                                |    57 +-
 test/approx1.cpp                                   |    46 +
 test/approx2.cpp                                   |    52 +
 test/array.cpp                                     |    38 +-
 test/assign.cpp                                    |   157 +-
 test/bilateral.cpp                                 |     2 +-
 test/blas.cpp                                      |    37 +-
 test/constant.cpp                                  |     2 +-
 test/convolve.cpp                                  |     2 +-
 test/corrcoef.cpp                                  |    94 +
 test/covariance.cpp                                |   135 +
 test/data                                          |     2 +-
 test/diff1.cpp                                     |     2 +-
 test/diff2.cpp                                     |     2 +-
 test/dog.cpp                                       |     7 +-
 test/fast.cpp                                      |     2 +-
 test/fftconvolve.cpp                               |     2 +-
 test/getting_started.cpp                           |     8 +-
 test/gloh_nonfree.cpp                              |   336 +
 test/hamming.cpp                                   |     4 +-
 test/histogram.cpp                                 |    21 +-
 test/homography.cpp                                |   277 +
 test/imageio.cpp                                   |    74 +-
 test/index.cpp                                     |   129 +-
 test/info.cpp                                      |    36 +-
 test/iota.cpp                                      |     2 +-
 test/ireduce.cpp                                   |    80 +-
 test/join.cpp                                      |     2 +-
 test/match_template.cpp                            |     2 +-
 test/mean.cpp                                      |   206 +-
 test/meanshift.cpp                                 |     2 +-
 test/medfilt.cpp                                   |     2 +-
 test/median.cpp                                    |    27 +-
 test/missing.cpp                                   |     2 -
 test/moddims.cpp                                   |     2 +-
 test/morph.cpp                                     |     2 +-
 test/nearest_neighbour.cpp                         |    14 +-
 test/random.cpp                                    |     2 +-
 test/range.cpp                                     |     2 +-
 test/rank_dense.cpp                                |    30 +
 test/reduce.cpp                                    |    78 +-
 test/regions.cpp                                   |     2 +-
 test/reorder.cpp                                   |     2 +-
 test/replace.cpp                                   |   120 +
 test/resize.cpp                                    |     2 +-
 test/rotate.cpp                                    |     2 +-
 test/rotate_linear.cpp                             |     2 +-
 test/sat.cpp                                       |     2 +-
 test/scan.cpp                                      |     4 +-
 test/select.cpp                                    |   128 +
 test/set.cpp                                       |     8 +
 test/shift.cpp                                     |     2 +-
 test/sift_nonfree.cpp                              |   342 +
 test/sobel.cpp                                     |     2 +-
 test/solve_dense.cpp                               |     6 +
 test/sort.cpp                                      |     2 +-
 test/sort_by_key.cpp                               |     7 +-
 test/sort_index.cpp                                |     7 +-
 test/stdev.cpp                                     |   207 +
 test/susan.cpp                                     |     2 +-
 test/svd_dense.cpp                                 |    99 +
 test/testHelpers.hpp                               |    41 +-
 test/tile.cpp                                      |     2 +-
 test/translate.cpp                                 |     2 +-
 test/transpose.cpp                                 |    20 +-
 test/transpose_inplace.cpp                         |     2 +-
 test/triangle.cpp                                  |     2 +-
 test/unwrap.cpp                                    |    40 +-
 test/var.cpp                                       |    16 +-
 test/where.cpp                                     |     2 +-
 test/wrap.cpp                                      |   179 +
 test/write.cpp                                     |     2 +-
 test/ycbcr_rgb.cpp                                 |    84 +
 655 files changed, 34340 insertions(+), 9418 deletions(-)

diff --cc src/backend/cpu/Array.cpp
index 64aacf5,9524a91..8ea6104
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@@ -64,46 -64,63 +64,60 @@@ namespace cp
          ready(true), owner(false)
      { }
  
++
      template<typename T>
 -    std::shared_ptr<T> evalNodes(const int &num,
 -                                 const dim4 &odims,
 -                                 const dim4 &ostrs,
 -                                 TNJ::Node_ptr &node)
 +    void Array<T>::eval()
      {
 -        std::shared_ptr<T> data(memAlloc<T>(num), memFree<T>);
 +        if (isReady()) return;
 +
++        this->setId(getActiveDeviceId());
+ 
 -        auto func = [&] {
 -            T *ptr = data.get();
++        if (isReady()) return;
++
 +        data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
 +
-         auto func = [this] {
-             setId(getActiveDeviceId());
-             T *ptr = data.get();
++        auto func = [] (Array<T> in) {
++            in.setId(getActiveDeviceId());
++            T *ptr = in.data.get();
  
-             dim4 ostrs = strides();
-             dim4 odims = dims();
 -            bool is_linear = node->isLinear(odims.get());
++            dim4 odims = in.dims();
++            dim4 ostrs = in.strides();
 +
-             for (int w = 0; w < (int)odims[3]; w++) {
-                 dim_t offw = w * ostrs[3];
++            bool is_linear = in.node->isLinear(odims.get());
  
-                 for (int z = 0; z < (int)odims[2]; z++) {
-                     dim_t offz = z * ostrs[2] + offw;
+             if (is_linear) {
++                int num = in.elements();
+                 for (int i = 0; i < num; i++) {
 -                    ptr[i] = *(T *)node->calc(i);
++                    ptr[i] = *(T *)in.node->calc(i);
+                 }
+             } else {
+                 for (int w = 0; w < (int)odims[3]; w++) {
+                     dim_t offw = w * ostrs[3];
+ 
+                     for (int z = 0; z < (int)odims[2]; z++) {
+                         dim_t offz = z * ostrs[2] + offw;
  
-                     for (int y = 0; y < (int)odims[1]; y++) {
-                         dim_t offy = y * ostrs[1] + offz;
+                         for (int y = 0; y < (int)odims[1]; y++) {
+                             dim_t offy = y * ostrs[1] + offz;
  
-                         for (int x = 0; x < (int)odims[0]; x++) {
-                             dim_t id = x + offy;
+                             for (int x = 0; x < (int)odims[0]; x++) {
+                                 dim_t id = x + offy;
  
-                             ptr[id] = *(T *)node->calc(x, y, z, w);
 -                                ptr[id] = *(T *)node->calc(x, y, z, w);
++                                ptr[id] = *(T *)in.node->calc(x, y, z, w);
+                             }
                          }
                      }
                  }
              }
- 
-             Node_ptr prev = node;
-             prev->reset();
-             // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
-             node.reset();
          };
  
 -        getQueue().enqueue(func);
 -
 -        return data;
 -    }
 -
 -    template<typename T>
 -    void Array<T>::eval()
 -    {
 -        if (isReady()) return;
 -
 -        this->setId(getActiveDeviceId());
 -
 -        data = evalNodes<T>(elements(), dims(), strides(), node);
++        getQueue().enqueue(func, *this);
+ 
          ready = true;
-         getQueue().enqueue(func);
+         Node_ptr prev = node;
+         prev->reset();
+         // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
+         node.reset();
      }
  
      template<typename T>
diff --cc src/backend/cpu/reduce.cpp
index 8ce7d0d,a38d061..ffe9185
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@@ -16,11 -16,22 +16,25 @@@
  #include <functional>
  #include <complex>
  
 +#include <platform.hpp>
 +#include <async_queue.hpp>
 +
  using af::dim4;
  
+ template<>
+ struct Binary<cdouble, af_add_t>
+ {
+     cdouble init()
+     {
+         return cdouble(0,0);
+     }
+ 
+     cdouble operator()(cdouble lhs, cdouble rhs)
+     {
+         return cdouble(real(lhs)+real(rhs), imag(lhs)+imag(rhs));
+     }
+ };
+ 
  namespace cpu
  {
      template<af_op_t op, typename Ti, typename To, int D>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list