[SCM] blender/upstream: New upstream version 2.78.b+dfsg0
mfv at users.alioth.debian.org
mfv at users.alioth.debian.org
Sat Jun 3 20:27:20 UTC 2017
The following commit has been merged in the upstream branch:
commit 65b83c264576a3d26265ff00c11e24d8eca37d71
Author: Matteo F. Vescovi <mfv at debian.org>
Date: Sat Jun 3 22:09:20 2017 +0200
New upstream version 2.78.b+dfsg0
diff --git a/build_files/cmake/platform/platform_win32_msvc.cmake b/build_files/cmake/platform/platform_win32_msvc.cmake
index 5efda52..ecdba59 100644
--- a/build_files/cmake/platform/platform_win32_msvc.cmake
+++ b/build_files/cmake/platform/platform_win32_msvc.cmake
@@ -236,14 +236,14 @@ if(WITH_CODEC_FFMPEG)
windows_find_package(FFMPEG)
if(NOT FFMPEG_FOUND)
warn_hardcoded_paths(ffmpeg)
- set(FFMPEG_LIBRARY_VERSION 55)
- set(FFMPEG_LIBRARY_VERSION_AVU 52)
+ set(FFMPEG_LIBRARY_VERSION 57)
+ set(FFMPEG_LIBRARY_VERSION_AVU 55)
set(FFMPEG_LIBRARIES
- ${LIBDIR}/ffmpeg/lib/avcodec-${FFMPEG_LIBRARY_VERSION}.lib
- ${LIBDIR}/ffmpeg/lib/avformat-${FFMPEG_LIBRARY_VERSION}.lib
- ${LIBDIR}/ffmpeg/lib/avdevice-${FFMPEG_LIBRARY_VERSION}.lib
- ${LIBDIR}/ffmpeg/lib/avutil-${FFMPEG_LIBRARY_VERSION_AVU}.lib
- ${LIBDIR}/ffmpeg/lib/swscale-2.lib
+ ${LIBDIR}/ffmpeg/lib/avcodec.lib
+ ${LIBDIR}/ffmpeg/lib/avformat.lib
+ ${LIBDIR}/ffmpeg/lib/avdevice.lib
+ ${LIBDIR}/ffmpeg/lib/avutil.lib
+ ${LIBDIR}/ffmpeg/lib/swscale.lib
)
endif()
endif()
@@ -378,6 +378,7 @@ if(WITH_OPENIMAGEIO)
set(OPENCOLORIO_DEFINITIONS "-DOCIO_STATIC_BUILD")
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
add_definitions(-DOIIO_STATIC_BUILD)
+ add_definitions(-DOIIO_NO_SSE=1)
endif()
if(WITH_LLVM)
diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h
index 0bc7905..1a139d7 100644
--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@@ -77,13 +77,13 @@
/* Function prototypes. */
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
-ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
-ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
+ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x);
+ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
#endif
-ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
-ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
+ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x);
+ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
@@ -91,18 +91,18 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b);
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b);
-ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x);
-ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x);
+ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x);
+ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
-ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
-ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
+ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x);
+ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x);
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
* which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads
* working on the same pointer at the same time is very low). */
-ATOMIC_INLINE float atomic_add_fl(float *p, const float x);
+ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x);
/******************************************************************************/
/* Include system-dependent implementations. */
diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h
index 4065299..74ed327 100644
--- a/intern/atomic/intern/atomic_ops_ext.h
+++ b/intern/atomic/intern/atomic_ops_ext.h
@@ -56,25 +56,25 @@
/******************************************************************************/
/* size_t operations. */
-ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x)
+ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x)
{
assert(sizeof(size_t) == LG_SIZEOF_PTR);
#if (LG_SIZEOF_PTR == 8)
- return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
+ return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 4)
- return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
+ return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
-ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x)
+ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x)
{
assert(sizeof(size_t) == LG_SIZEOF_PTR);
#if (LG_SIZEOF_PTR == 8)
- return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
+ return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_PTR == 4)
- return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
+ return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
@@ -91,25 +91,25 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
/******************************************************************************/
/* unsigned operations. */
-ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x)
+ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x)
{
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
- return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
+ return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 4)
- return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
+ return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x);
#endif
}
-ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x)
+ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x)
{
assert(sizeof(unsigned) == LG_SIZEOF_INT);
#if (LG_SIZEOF_INT == 8)
- return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
+ return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
- return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
+ return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
@@ -127,7 +127,7 @@ ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
/******************************************************************************/
/* float operations. */
-ATOMIC_INLINE float atomic_add_fl(float *p, const float x)
+ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x)
{
assert(sizeof(float) == sizeof(uint32_t));
diff --git a/intern/atomic/intern/atomic_ops_msvc.h b/intern/atomic/intern/atomic_ops_msvc.h
index 15ddda2..c6a4bef 100644
--- a/intern/atomic/intern/atomic_ops_msvc.h
+++ b/intern/atomic/intern/atomic_ops_msvc.h
@@ -43,12 +43,12 @@
/******************************************************************************/
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
-ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
+ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
}
-ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
+ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
}
@@ -61,12 +61,12 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
/******************************************************************************/
/* 32-bit operations. */
-ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
+ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, x) + x;
}
-ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
+ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
}
diff --git a/intern/atomic/intern/atomic_ops_unix.h b/intern/atomic/intern/atomic_ops_unix.h
index 55c0002..ad6fe74 100644
--- a/intern/atomic/intern/atomic_ops_unix.h
+++ b/intern/atomic/intern/atomic_ops_unix.h
@@ -58,12 +58,12 @@
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
# if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
-ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
+ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return __sync_add_and_fetch(p, x);
}
-ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
+ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return __sync_sub_and_fetch(p, x);
}
@@ -73,7 +73,7 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return __sync_val_compare_and_swap(v, old, _new);
}
# elif (defined(__amd64__) || defined(__x86_64__))
-ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
+ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
@@ -83,7 +83,7 @@ ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
return x;
}
-ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
+ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
{
x = (uint64_t)(-(int64_t)x);
asm volatile (
@@ -112,12 +112,12 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
/******************************************************************************/
/* 32-bit operations. */
#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
-ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
+ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return __sync_add_and_fetch(p, x);
}
-ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
+ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return __sync_sub_and_fetch(p, x);
}
@@ -127,7 +127,7 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
return __sync_val_compare_and_swap(v, old, _new);
}
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
+ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = x;
asm volatile (
@@ -138,7 +138,7 @@ ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
return ret+x;
}
-ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
+ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
ret = (uint32_t)(-(int32_t)x);
asm volatile (
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 97854a8..79c1c3e 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -74,7 +74,6 @@ elseif(CMAKE_COMPILER_IS_GNUCC)
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c -mfpmath=sse")
endif()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
check_cxx_compiler_flag(-msse CXX_HAS_SSE)
check_cxx_compiler_flag(-mavx CXX_HAS_AVX)
@@ -90,7 +89,6 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(CXX_HAS_AVX2)
set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mlzcnt -mbmi -mbmi2 -mf16c")
endif()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
endif()
if(CXX_HAS_SSE)
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index e8168bc..9816d61 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -72,20 +72,17 @@ static void session_print(const string& str)
static void session_print_status()
{
- int sample, tile;
- double total_time, sample_time, render_time;
string status, substatus;
/* get status */
- sample = options.session->progress.get_sample();
- options.session->progress.get_tile(tile, total_time, sample_time, render_time);
+ float progress = options.session->progress.get_progress();
options.session->progress.get_status(status, substatus);
if(substatus != "")
status += ": " + substatus;
/* print status */
- status = string_printf("Sample %d %s", sample, status.c_str());
+ status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str());
session_print(status);
}
@@ -167,13 +164,12 @@ static void display_info(Progress& progress)
latency = (elapsed - last);
last = elapsed;
- int sample, tile;
- double total_time, sample_time, render_time;
+ double total_time, sample_time;
string status, substatus;
- sample = progress.get_sample();
- progress.get_tile(tile, total_time, sample_time, render_time);
+ progress.get_time(total_time, sample_time);
progress.get_status(status, substatus);
+ float progress_val = progress.get_progress();
if(substatus != "")
status += ": " + substatus;
@@ -184,10 +180,10 @@ static void display_info(Progress& progress)
"%s"
" Time: %.2f"
" Latency: %.4f"
- " Sample: %d"
+ " Progress: %05.2f"
" Average: %.4f"
" Interactive: %s",
- status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
+ status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
view_display_info(str.c_str());
@@ -337,7 +333,7 @@ static void options_parse(int argc, const char **argv)
/* device names */
string device_names = "";
- string devicename = "cpu";
+ string devicename = "CPU";
bool list = false;
vector<DeviceType>& types = Device::available_types();
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 8a3eb98..35a30ae 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -210,17 +210,6 @@ static void xml_read_camera(XMLReadState& state, pugi::xml_node node)
/* Shader */
-static string xml_socket_name(const char *name)
-{
- string sname = name;
- size_t i;
-
- while((i = sname.find(" ")) != string::npos)
- sname.replace(i, 1, "");
-
- return sname;
-}
-
static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml_node graph_node)
{
xml_read_node(state, shader, graph_node);
@@ -255,7 +244,7 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml
ShaderNode *fromnode = (ShaderNode*)graph_reader.node_map[from_node_name];
foreach(ShaderOutput *out, fromnode->outputs)
- if(string_iequals(xml_socket_name(out->name().c_str()), from_socket_name.c_str()))
+ if(string_iequals(out->socket_type.name.string(), from_socket_name.string()))
output = out;
if(!output)
@@ -268,7 +257,7 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml
ShaderNode *tonode = (ShaderNode*)graph_reader.node_map[to_node_name];
foreach(ShaderInput *in, tonode->inputs)
- if(string_iequals(xml_socket_name(in->name().c_str()), to_socket_name.c_str()))
+ if(string_iequals(in->socket_type.name.string(), to_socket_name.string()))
input = in;
if(!input)
@@ -406,7 +395,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
int shader = 0;
bool smooth = state.smooth;
- /* read vertices and polygons, RIB style */
+ /* read vertices and polygons */
vector<float3> P;
vector<float> UV;
vector<int> verts, nverts;
@@ -532,8 +521,12 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
sdparams.objecttoworld = state.tfm;
}
- /* temporary for test compatibility */
- mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
+ /* we don't yet support arbitrary attributes, for now add vertex
+ * coordinates as generated coordinates if requested */
+ if(mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
+ Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
+ memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3)*mesh->verts.size());
+ }
}
/* Light */
diff --git a/intern/cycles/blender/CCL_api.h b/intern/cycles/blender/CCL_api.h
index d3a68c4..233ffc8 100644
--- a/intern/cycles/blender/CCL_api.h
+++ b/intern/cycles/blender/CCL_api.h
@@ -21,17 +21,6 @@
extern "C" {
#endif
-/* returns a list of devices for selection, array is empty identifier
- * terminated and must not be freed */
-
-typedef struct CCLDeviceInfo {
- char identifier[128];
- char name[512];
- int value;
-} CCLDeviceInfo;
-
-CCLDeviceInfo *CCL_compute_device_list(int device_type);
-
/* create python module _cycles used by addon */
void *CCL_python_module_init(void);
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index a79deca..b57502b 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -25,6 +25,7 @@ set(SRC
blender_camera.cpp
blender_mesh.cpp
blender_object.cpp
+ blender_object_cull.cpp
blender_particles.cpp
blender_curves.cpp
blender_logging.cpp
@@ -35,6 +36,7 @@ set(SRC
blender_texture.cpp
CCL_api.h
+ blender_object_cull.h
blender_sync.h
blender_session.h
blender_texture.h
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index 2938831..1fc3758 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -28,6 +28,20 @@ bl_info = {
"support": 'OFFICIAL',
"category": "Render"}
+# Support 'reload' case.
+if "bpy" in locals():
+ import importlib
+ if "engine" in locals():
+ importlib.reload(engine)
+ if "version_update" in locals():
+ importlib.reload(version_update)
+ if "ui" in locals():
+ importlib.reload(ui)
+ if "properties" in locals():
+ importlib.reload(properties)
+ if "presets" in locals():
+ importlib.reload(presets)
+
import bpy
from . import (
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 977d7f7..802b9b7 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -21,7 +21,8 @@ from bpy.props import (BoolProperty,
EnumProperty,
FloatProperty,
IntProperty,
- PointerProperty)
+ PointerProperty,
+ StringProperty)
# enums
@@ -29,7 +30,7 @@ import _cycles
enum_devices = (
('CPU', "CPU", "Use CPU for rendering"),
- ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in user preferences"),
+ ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
)
if _cycles.with_network:
@@ -122,6 +123,22 @@ enum_volume_interpolation = (
('CUBIC', "Cubic", "Smoothed high quality interpolation, but slower")
)
+enum_device_type = (
+ ('CPU', "CPU", "CPU", 0),
+ ('CUDA', "CUDA", "CUDA", 1),
+ ('OPENCL', "OpenCL", "OpenCL", 2)
+ )
+
+enum_texture_limit = (
+ ('OFF', "No Limit", "No texture size limit", 0),
+ ('128', "128", "Limit texture size to 128 pixels", 1),
+ ('256', "256", "Limit texture size to 256 pixels", 2),
+ ('512', "512", "Limit texture size to 512 pixels", 3),
+ ('1024', "1024", "Limit texture size to 1024 pixels", 4),
+ ('2048', "2048", "Limit texture size to 2048 pixels", 5),
+ ('4096', "4096", "Limit texture size to 4096 pixels", 6),
+ ('8192', "8192", "Limit texture size to 8192 pixels", 7),
+ )
class CyclesRenderSettings(bpy.types.PropertyGroup):
@classmethod
@@ -266,6 +283,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Sample all lights (for indirect samples), rather than randomly picking one",
default=True,
)
+ cls.light_sampling_threshold = FloatProperty(
+ name="Light Sampling Threshold",
+ description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
+ "Zero disables the test and never ignores lights",
+ min=0.0, max=1.0,
+ default=0.01,
+ )
cls.caustics_reflective = BoolProperty(
name="Reflective Caustics",
@@ -504,6 +528,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Use special type BVH optimized for hair (uses more ram but renders faster)",
default=True,
)
+ cls.debug_bvh_time_steps = IntProperty(
+ name="BVH Time Steps",
+ description="Split BVH primitives by this number of time steps to speed up render time in cost of memory",
+ default=0,
+ min=0, max=16,
+ )
cls.tile_order = EnumProperty(
name="Tile Order",
description="Tile order for rendering",
@@ -552,6 +582,19 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0.0, max=5.0
)
+ cls.use_distance_cull = BoolProperty(
+ name="Use Distance Cull",
+ description="Allow objects to be culled based on the distance from camera",
+ default=False,
+ )
+
+ cls.distance_cull_margin = FloatProperty(
+ name="Cull Distance",
+ description="Cull objects which are further away from camera than this distance",
+ default=50,
+ min=0.0
+ )
+
cls.motion_blur_position = EnumProperty(
name="Motion Blur Position",
default='CENTER',
@@ -581,6 +624,20 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0.0, max=1.0,
)
+ cls.texture_limit = EnumProperty(
+ name="Viewport Texture Limit",
+ default='OFF',
+ description="Limit texture size used by viewport rendering",
+ items=enum_texture_limit
+ )
+
+ cls.texture_limit_render = EnumProperty(
+ name="Render Texture Limit",
+ default='OFF',
+ description="Limit texture size used by final rendering",
+ items=enum_texture_limit
+ )
+
# Various fine-tuning debug flags
def devices_update_callback(self, context):
@@ -1002,6 +1059,12 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
default=False,
)
+ cls.use_distance_cull = BoolProperty(
+ name="Use Distance Cull",
+ description="Allow this object and its duplicators to be culled by distance from camera",
+ default=False,
+ )
+
cls.use_adaptive_subdivision = BoolProperty(
name="Use Adaptive Subdivision",
description="Use adaptive render time subdivision",
@@ -1123,6 +1186,107 @@ class CyclesCurveSettings(bpy.types.PropertyGroup):
del bpy.types.ParticleSettings.cycles
+class CyclesDeviceSettings(bpy.types.PropertyGroup):
+ @classmethod
+ def register(cls):
+ cls.id = StringProperty(name="ID")
+ cls.name = StringProperty(name="Name")
+ cls.use = BoolProperty(name="Use", default=True)
+ cls.type = EnumProperty(name="Type", items=enum_device_type, default='CUDA')
+
+
+class CyclesPreferences(bpy.types.AddonPreferences):
+ bl_idname = __package__
+
+ def get_device_types(self, context):
+ import _cycles
+ has_cuda, has_opencl = _cycles.get_device_types()
+ list = [('NONE', "None", "Don't use compute device", 0)]
+ if has_cuda:
+ list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
+ if has_opencl:
+ list.append(('OPENCL', "OpenCL", "Use OpenCL for GPU acceleration", 2))
+ return list
+
+ compute_device_type = EnumProperty(
+ name="Compute Device Type",
+ description="Device to use for computation (rendering with Cycles)",
+ items=get_device_types,
+ )
+
+ devices = bpy.props.CollectionProperty(type=CyclesDeviceSettings)
+
+ def get_devices(self):
+ import _cycles
+ # Layout of the device tuples: (Name, Type, Persistent ID)
+ device_list = _cycles.available_devices()
+
+ cuda_devices = []
+ opencl_devices = []
+ for device in device_list:
+ if not device[1] in {'CUDA', 'OPENCL'}:
+ continue
+
+ entry = None
+ # Try to find existing Device entry
+ for dev in self.devices:
+ if dev.id == device[2] and dev.type == device[1]:
+ entry = dev
+ break
+ # Create new entry if no existing one was found
+ if not entry:
+ entry = self.devices.add()
+ entry.id = device[2]
+ entry.name = device[0]
+ entry.type = device[1]
+
+ # Sort entries into lists
+ if entry.type == 'CUDA':
+ cuda_devices.append(entry)
+ elif entry.type == 'OPENCL':
+ opencl_devices.append(entry)
+ return cuda_devices, opencl_devices
+
+
+ def get_num_gpu_devices(self):
+ import _cycles
+ device_list = _cycles.available_devices()
+ num = 0
+ for device in device_list:
+ if device[1] != self.compute_device_type:
+ continue
+ for dev in self.devices:
+ if dev.use and dev.id == device[2]:
+ num += 1
+ return num
+
+
+ def has_active_device(self):
+ return self.get_num_gpu_devices() > 0
+
+
+ def draw_impl(self, layout, context):
+ layout.label(text="Cycles Compute Device:")
+ layout.row().prop(self, "compute_device_type", expand=True)
+
+ cuda_devices, opencl_devices = self.get_devices()
+ row = layout.row()
+
+ if self.compute_device_type == 'CUDA' and cuda_devices:
+ col = row.column(align=True)
+ for device in cuda_devices:
+ col.prop(device, "use", text=device.name, toggle=True)
+
+ if self.compute_device_type == 'OPENCL' and opencl_devices:
+ col = row.column(align=True)
+ for device in opencl_devices:
+ col.prop(device, "use", text=device.name, toggle=True)
+
+
+ def draw(self, context):
+ self.draw_impl(self.layout, context)
+
+
def register():
bpy.utils.register_class(CyclesRenderSettings)
bpy.utils.register_class(CyclesCameraSettings)
@@ -1134,6 +1298,8 @@ def register():
bpy.utils.register_class(CyclesObjectSettings)
bpy.utils.register_class(CyclesCurveRenderSettings)
bpy.utils.register_class(CyclesCurveSettings)
+ bpy.utils.register_class(CyclesDeviceSettings)
+ bpy.utils.register_class(CyclesPreferences)
def unregister():
@@ -1147,3 +1313,5 @@ def unregister():
bpy.utils.unregister_class(CyclesVisibilitySettings)
bpy.utils.unregister_class(CyclesCurveRenderSettings)
bpy.utils.unregister_class(CyclesCurveSettings)
+ bpy.utils.unregister_class(CyclesDeviceSettings)
+ bpy.utils.unregister_class(CyclesPreferences)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 52872d2..ddcefaf 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -53,25 +53,26 @@ class CyclesButtonsPanel:
return rd.engine in cls.COMPAT_ENGINES
+def get_device_type(context):
+ return context.user_preferences.addons[__package__].preferences.compute_device_type
+
+
def use_cpu(context):
cscene = context.scene.cycles
- device_type = context.user_preferences.system.compute_device_type
- return (device_type == 'NONE' or cscene.device == 'CPU')
+ return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
def use_opencl(context):
cscene = context.scene.cycles
- device_type = context.user_preferences.system.compute_device_type
- return (device_type == 'OPENCL' and cscene.device == 'GPU')
+ return (get_device_type(context) == 'OPENCL' and cscene.device == 'GPU')
def use_cuda(context):
cscene = context.scene.cycles
- device_type = context.user_preferences.system.compute_device_type
- return (device_type == 'CUDA' and cscene.device == 'GPU')
+ return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
def use_branched_path(context):
@@ -85,6 +86,14 @@ def use_sample_all_lights(context):
return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect
+def show_device_selection(context):
+ type = get_device_type(context)
+ if type == 'NETWORK':
+ return True
+ if not type in {'CUDA', 'OPENCL'}:
+ return False
+ return context.user_preferences.addons[__package__].preferences.has_active_device()
+
def draw_samples_info(layout, context):
cscene = context.scene.cycles
@@ -141,7 +150,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
- device_type = context.user_preferences.system.compute_device_type
row = layout.row(align=True)
row.menu("CYCLES_MT_sampling_presets", text=bpy.types.CYCLES_MT_sampling_presets.bl_label)
@@ -150,7 +158,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
row = layout.row()
sub = row.row()
- sub.active = device_type != 'OPENCL' or use_cpu(context)
+ sub.active = get_device_type(context) != 'OPENCL' or use_cpu(context)
sub.prop(cscene, "progressive", text="")
row.prop(cscene, "use_square_samples")
@@ -166,6 +174,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
sub.prop(cscene, "sample_clamp_direct")
sub.prop(cscene, "sample_clamp_indirect")
+ sub.prop(cscene, "light_sampling_threshold")
if cscene.progressive == 'PATH' or use_branched_path(context) is False:
col = split.column()
@@ -208,7 +217,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
draw_samples_info(layout, context)
-class CyclesRender_PT_geometery(CyclesButtonsPanel, Panel):
+class CyclesRender_PT_geometry(CyclesButtonsPanel, Panel):
bl_label = "Geometry"
bl_options = {'DEFAULT_CLOSED'}
@@ -217,6 +226,7 @@ class CyclesRender_PT_geometery(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
+ ccscene = scene.cycles_curves
if cscene.feature_set == 'EXPERIMENTAL':
split = layout.split()
@@ -243,6 +253,25 @@ class CyclesRender_PT_geometery(CyclesButtonsPanel, Panel):
row.prop(cscene, "volume_step_size")
row.prop(cscene, "volume_max_steps")
+ layout.prop(ccscene, "use_curves", text="Use Hair")
+ col = layout.column()
+ col.active = ccscene.use_curves
+
+ col.prop(ccscene, "primitive", text="Primitive")
+ col.prop(ccscene, "shape", text="Shape")
+
+ if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
+ col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
+
+ if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
+ col.prop(ccscene, "resolution", text="Resolution")
+ elif ccscene.primitive == 'CURVE_SEGMENTS':
+ col.prop(ccscene, "subdivisions", text="Curve subdivisions")
+
+ row = col.row()
+ row.prop(ccscene, "minimum_width", text="Min Pixels")
+ row.prop(ccscene, "maximum_width", text="Max Ext.")
+
class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
bl_label = "Light Paths"
@@ -403,6 +432,10 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
col.prop(cscene, "debug_use_spatial_splits")
col.prop(cscene, "debug_use_hair_bvh")
+ row = col.row()
+ row.active = not cscene.debug_use_spatial_splits
+ row.prop(cscene, "debug_bvh_time_steps")
+
class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel):
bl_label = "Layer"
@@ -758,8 +791,13 @@ class CyclesObject_PT_cycles_settings(CyclesButtonsPanel, Panel):
col = layout.column()
col.label(text="Performance:")
row = col.row()
- row.active = scene.render.use_simplify and cscene.use_camera_cull
- row.prop(cob, "use_camera_cull")
+ sub = row.row()
+ sub.active = scene.render.use_simplify and cscene.use_camera_cull
+ sub.prop(cob, "use_camera_cull")
+
+ sub = row.row()
+ sub.active = scene.render.use_simplify and cscene.use_distance_cull
+ sub.prop(cob, "use_distance_cull")
class CYCLES_OT_use_shading_nodes(Operator):
@@ -1380,43 +1418,6 @@ class CyclesParticle_PT_textures(CyclesButtonsPanel, Panel):
layout.template_ID(slot, "texture", new="texture.new")
-class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
- bl_label = "Cycles Hair Rendering"
- bl_context = "particle"
-
- @classmethod
- def poll(cls, context):
- psys = context.particle_system
- return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR'
-
- def draw_header(self, context):
- ccscene = context.scene.cycles_curves
- self.layout.prop(ccscene, "use_curves", text="")
-
- def draw(self, context):
- layout = self.layout
-
- scene = context.scene
- ccscene = scene.cycles_curves
-
- layout.active = ccscene.use_curves
-
- layout.prop(ccscene, "primitive", text="Primitive")
- layout.prop(ccscene, "shape", text="Shape")
-
- if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
- layout.prop(ccscene, "cull_backfacing", text="Cull back-faces")
-
- if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
- layout.prop(ccscene, "resolution", text="Resolution")
- elif ccscene.primitive == 'CURVE_SEGMENTS':
- layout.prop(ccscene, "subdivisions", text="Curve subdivisions")
-
- row = layout.row()
- row.prop(ccscene, "minimum_width", text="Min Pixels")
- row.prop(ccscene, "maximum_width", text="Max Ext.")
-
-
class CyclesRender_PT_bake(CyclesButtonsPanel, Panel):
bl_label = "Bake"
bl_context = "render"
@@ -1576,24 +1577,40 @@ class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
cscene = scene.cycles
layout.active = rd.use_simplify
- split = layout.split()
- col = split.column()
- col.label(text="Viewport:")
- col.prop(rd, "simplify_subdivision", text="Subdivision")
- col.prop(rd, "simplify_child_particles", text="Child Particles")
+ col = layout.column(align=True)
+ col.label(text="Subdivision")
+ row = col.row(align=True)
+ row.prop(rd, "simplify_subdivision", text="Viewport")
+ row.prop(rd, "simplify_subdivision_render", text="Render")
- col = split.column()
- col.label(text="Render:")
- col.prop(rd, "simplify_subdivision_render", text="Subdivision")
- col.prop(rd, "simplify_child_particles_render", text="Child Particles")
+ col = layout.column(align=True)
+ col.label(text="Child Particles")
+ row = col.row(align=True)
+ row.prop(rd, "simplify_child_particles", text="Viewport")
+ row.prop(rd, "simplify_child_particles_render", text="Render")
- col = layout.column()
+ col = layout.column(align=True)
+ split = col.split()
+ sub = split.column()
+ sub.label(text="Texture Limit Viewport")
+ sub.prop(cscene, "texture_limit", text="")
+ sub = split.column()
+ sub.label(text="Texture Limit Render")
+ sub.prop(cscene, "texture_limit_render", text="")
+
+ split = layout.split()
+ col = split.column()
col.prop(cscene, "use_camera_cull")
- subsub = col.column()
- subsub.active = cscene.use_camera_cull
- subsub.prop(cscene, "camera_cull_margin")
+ row = col.row()
+ row.active = cscene.use_camera_cull
+ row.prop(cscene, "camera_cull_margin")
+ col = split.column()
+ col.prop(cscene, "use_distance_cull")
+ row = col.row()
+ row.active = cscene.use_distance_cull
+ row.prop(cscene, "distance_cull_margin", text="Distance")
def draw_device(self, context):
scene = context.scene
@@ -1605,9 +1622,11 @@ def draw_device(self, context):
layout.prop(cscene, "feature_set")
- device_type = context.user_preferences.system.compute_device_type
- if device_type in {'CUDA', 'OPENCL', 'NETWORK'}:
- layout.prop(cscene, "device")
+ split = layout.split(percentage=1/3)
+ split.label("Device:")
+ row = split.row()
+ row.active = show_device_selection(context)
+ row.prop(cscene, "device", text="")
if engine.with_osl() and use_cpu(context):
layout.prop(cscene, "shading_system")
diff --git a/intern/cycles/blender/addon/version_update.py b/intern/cycles/blender/addon/version_update.py
index 830723d..b2a7455 100644
--- a/intern/cycles/blender/addon/version_update.py
+++ b/intern/cycles/blender/addon/version_update.py
@@ -172,6 +172,24 @@ def custom_bake_remap(scene):
@persistent
def do_versions(self):
+ if bpy.context.user_preferences.version <= (2, 78, 1):
+ prop = bpy.context.user_preferences.addons[__package__].preferences
+ system = bpy.context.user_preferences.system
+ if not prop.is_property_set("compute_device_type"):
+ # Device might not currently be available so this can fail
+ try:
+ if system.legacy_compute_device_type == 1:
+ prop.compute_device_type = 'OPENCL'
+ elif system.legacy_compute_device_type == 2:
+ prop.compute_device_type = 'CUDA'
+ else:
+ prop.compute_device_type = 'NONE'
+ except:
+ pass
+
+ # Init device list for UI
+ prop.get_devices()
+
# We don't modify startup file because it assumes to
# have all the default values only.
if not bpy.data.is_saved:
@@ -278,3 +296,9 @@ def do_versions(self):
cscene.pixel_filter_type = cscene.filter_type
if cscene.filter_type == 'BLACKMAN_HARRIS':
cscene.filter_type = 'GAUSSIAN'
+
+ if bpy.data.version <= (2, 78, 2):
+ for scene in bpy.data.scenes:
+ cscene = scene.cycles
+ if not cscene.is_property_set("light_sampling_threshold"):
+ cscene.light_sampling_threshold = 0.0
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 378ae67..e42ff5d 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -29,24 +29,6 @@
CCL_NAMESPACE_BEGIN
-/* Utilities */
-
-/* Hair curve functions */
-
-void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4]);
-void interp_weights(float t, float data[4]);
-float shaperadius(float shape, float root, float tip, float time);
-void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData);
-bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num);
-bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num);
-bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background);
-void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData);
-void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
- float3 RotCam, bool is_ortho);
-void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution);
-void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata);
-void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata);
-
ParticleCurveData::ParticleCurveData()
{
}
@@ -55,7 +37,7 @@ ParticleCurveData::~ParticleCurveData()
{
}
-void interp_weights(float t, float data[4])
+static void interp_weights(float t, float data[4])
{
/* Cardinal curve interpolation */
float t2 = t * t;
@@ -68,17 +50,19 @@ void interp_weights(float t, float data[4])
data[3] = fc * t3 - fc * t2;
}
-void curveinterp_v3_v3v3v3v3(float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
+static void curveinterp_v3_v3v3v3v3(float3 *p,
+ float3 *v1, float3 *v2, float3 *v3, float3 *v4,
+ const float w[4])
{
p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
p->z = v1->z * w[0] + v2->z * w[1] + v3->z * w[2] + v4->z * w[3];
}
-float shaperadius(float shape, float root, float tip, float time)
+static float shaperadius(float shape, float root, float tip, float time)
{
float radius = 1.0f - time;
-
+
if(shape != 0.0f) {
if(shape < 0.0f)
radius = powf(radius, 1.0f + shape);
@@ -90,7 +74,13 @@ float shaperadius(float shape, float root, float tip, float time)
/* curve functions */
-void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
+static void InterpolateKeySegments(int seg,
+ int segno,
+ int key,
+ int curve,
+ float3 *keyloc,
+ float *time,
+ ParticleCurveData *CData)
{
float3 ckey_loc1 = CData->curvekey_co[key];
float3 ckey_loc2 = ckey_loc1;
@@ -119,7 +109,11 @@ void InterpolateKeySegments(int seg, int segno, int key, int curve, float3 *keyl
curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
}
-bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
+static bool ObtainCacheParticleData(Mesh *mesh,
+ BL::Mesh *b_mesh,
+ BL::Object *b_ob,
+ ParticleCurveData *CData,
+ bool background)
{
int curvenum = 0;
int keyno = 0;
@@ -143,7 +137,7 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
int totcurves = totchild;
-
+
if(b_part.child_type() == 0 || totchild == 0)
totcurves += totparts;
@@ -161,7 +155,7 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
CData->psys_shader.push_back_slow(shader);
float radius = get_float(cpsys, "radius_scale") * 0.5f;
-
+
CData->psys_rootradius.push_back_slow(radius * get_float(cpsys, "root_width"));
CData->psys_tipradius.push_back_slow(radius * get_float(cpsys, "tip_width"));
CData->psys_shape.push_back_slow(get_float(cpsys, "shape"));
@@ -181,7 +175,7 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
for(; pa_no < totparts+totchild; pa_no++) {
int keynum = 0;
CData->curve_firstkey.push_back_slow(keyno);
-
+
float curve_length = 0.0f;
float3 pcKey;
for(int step_no = 0; step_no < ren_step; step_no++) {
@@ -213,7 +207,12 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
return true;
}
-bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num)
+static bool ObtainCacheParticleUV(Mesh *mesh,
+ BL::Mesh *b_mesh,
+ BL::Object *b_ob,
+ ParticleCurveData *CData,
+ bool background,
+ int uv_num)
{
if(!(mesh && b_mesh && b_ob && CData))
return false;
@@ -231,7 +230,7 @@ bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Parti
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
int totcurves = totchild;
-
+
if(b_part.child_type() == 0 || totchild == 0)
totcurves += totparts;
@@ -267,7 +266,12 @@ bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Parti
return true;
}
-bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num)
+static bool ObtainCacheParticleVcol(Mesh *mesh,
+ BL::Mesh *b_mesh,
+ BL::Object *b_ob,
+ ParticleCurveData *CData,
+ bool background,
+ int vcol_num)
{
if(!(mesh && b_mesh && b_ob && CData))
return false;
@@ -285,7 +289,7 @@ bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.draw_percentage() / 100.0f);
int totcurves = totchild;
-
+
if(b_part.child_type() == 0 || totchild == 0)
totcurves += totparts;
@@ -333,16 +337,16 @@ static void set_resolution(BL::Object *b_ob, BL::Scene *scene, bool render)
}
}
-void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
- float3 RotCam, bool is_ortho)
+static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
+ float3 RotCam, bool is_ortho)
{
int vertexno = mesh->verts.size();
int vertexindex = vertexno;
int numverts = 0, numtris = 0;
/* compute and reserve size of arrays */
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -354,8 +358,8 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
/* actually export */
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -380,7 +384,7 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
if(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
- else
+ else
v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];
time = CData->curvekey_time[curvekey]/CData->curve_length[curve];
@@ -416,15 +420,17 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
/* texture coords still needed */
}
-void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
+static void ExportCurveTriangleGeometry(Mesh *mesh,
+ ParticleCurveData *CData,
+ int resolution)
{
int vertexno = mesh->verts.size();
int vertexindex = vertexno;
int numverts = 0, numtris = 0;
/* compute and reserve size of arrays */
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -436,8 +442,8 @@ void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resol
mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
/* actually export */
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -548,7 +554,7 @@ void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resol
/* texture coords still needed */
}
-void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
+static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
{
int num_keys = 0;
int num_curves = 0;
@@ -557,13 +563,13 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
return;
Attribute *attr_intercept = NULL;
-
+
if(mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT);
/* compute and reserve size of arrays */
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -582,8 +588,8 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
num_curves = 0;
/* actually export */
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -677,8 +683,13 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
/* in case of new attribute, we verify if there really was any motion */
if(new_attribute) {
if(i != numkeys || !have_motion) {
- /* no motion, remove attributes again */
- VLOG(1) << "No motion, removing attribute";
+ /* No motion or hair "topology" changed, remove attributes again. */
+ if(i != numkeys) {
+ VLOG(1) << "Hair topology changed, removing attribute.";
+ }
+ else {
+ VLOG(1) << "No motion, removing attribute.";
+ }
mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
}
else if(time_index > 0) {
@@ -698,7 +709,10 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
}
}
-void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata)
+static void ExportCurveTriangleUV(ParticleCurveData *CData,
+ int vert_offset,
+ int resol,
+ float3 *uvdata)
{
if(uvdata == NULL)
return;
@@ -708,8 +722,8 @@ void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol,
int vertexindex = vert_offset;
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -743,15 +757,18 @@ void ExportCurveTriangleUV(ParticleCurveData *CData, int vert_offset, int resol,
}
}
-void ExportCurveTriangleVcol(ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata)
+static void ExportCurveTriangleVcol(ParticleCurveData *CData,
+ int vert_offset,
+ int resol,
+ uchar4 *cdata)
{
if(cdata == NULL)
return;
int vertexindex = vert_offset;
- for(int sys = 0; sys < CData->psys_firstcurve.size() ; sys++) {
- for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys] ; curve++) {
+ for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+ for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f)
continue;
@@ -1044,4 +1061,3 @@ void BlenderSync::sync_curves(Mesh *mesh,
}
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index 7c2049d..85117cf 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -597,8 +597,8 @@ static void create_mesh(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
const vector<Shader*>& used_shaders,
- bool subdivision=false,
- bool subdivide_uvs=true)
+ bool subdivision = false,
+ bool subdivide_uvs = true)
{
/* count vertices and faces */
int numverts = b_mesh.vertices.length();
@@ -671,28 +671,10 @@ static void create_mesh(Scene *scene,
int shader = clamp(f->material_index(), 0, used_shaders.size()-1);
bool smooth = f->use_smooth() || use_loop_normals;
- /* split vertices if normal is different
+ /* Create triangles.
*
- * note all vertex attributes must have been set here so we can split
- * and copy attributes in split_vertex without remapping later */
- if(use_loop_normals) {
- BL::Array<float, 12> loop_normals = f->split_normals();
-
- for(int i = 0; i < n; i++) {
- float3 loop_N = make_float3(loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
-
- if(N[vi[i]] != loop_N) {
- int new_vi = mesh->split_vertex(vi[i]);
-
- /* set new normal and vertex index */
- N = attr_N->data_float3();
- N[new_vi] = loop_N;
- vi[i] = new_vi;
- }
- }
- }
-
- /* create triangles */
+ * NOTE: Autosmooth is already taken care about.
+ */
if(n == 4) {
if(is_zero(cross(mesh->verts[vi[1]] - mesh->verts[vi[0]], mesh->verts[vi[2]] - mesh->verts[vi[0]])) ||
is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]])))
@@ -724,24 +706,8 @@ static void create_mesh(Scene *scene,
vi.reserve(n);
for(int i = 0; i < n; i++) {
+ /* NOTE: Autosmooth is already taken care about. */
vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
-
- /* split vertices if normal is different
- *
- * note all vertex attributes must have been set here so we can split
- * and copy attributes in split_vertex without remapping later */
- if(use_loop_normals) {
- float3 loop_N = get_float3(b_mesh.loops[p->loop_start() + i].normal());
-
- if(N[vi[i]] != loop_N) {
- int new_vi = mesh->split_vertex(vi[i]);
-
- /* set new normal and vertex index */
- N = attr_N->data_float3();
- N[new_vi] = loop_N;
- vi[i] = new_vi;
- }
- }
}
/* create subd faces */
@@ -847,7 +813,7 @@ static void sync_mesh_fluid_motion(BL::Object& b_ob, Scene *scene, Mesh *mesh)
/* Only export previous and next frame, we don't have any in between data. */
float motion_times[2] = {-1.0f, 1.0f};
- for (int step = 0; step < 2; step++) {
+ for(int step = 0; step < 2; step++) {
float relative_time = motion_times[step] * scene->motion_shutter_time() * 0.5f;
float3 *mP = attr_mP->data_float3() + step*mesh->verts.size();
@@ -961,7 +927,20 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental);
- BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, need_undeformed, mesh->subdivision_type);
+ /* Disable adaptive subdivision while baking as the baking system
+ * currently doesnt support the topology and will crash.
+ */
+ if(scene->bake_manager->get_baking()) {
+ mesh->subdivision_type = Mesh::SUBDIVISION_NONE;
+ }
+
+ BL::Mesh b_mesh = object_to_mesh(b_data,
+ b_ob,
+ b_scene,
+ true,
+ !preview,
+ need_undeformed,
+ mesh->subdivision_type);
if(b_mesh) {
if(render_layer.use_surfaces && !hide_tris) {
@@ -1086,7 +1065,13 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
if(ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
/* get derived mesh */
- b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false, false);
+ b_mesh = object_to_mesh(b_data,
+ b_ob,
+ b_scene,
+ true,
+ !preview,
+ false,
+ Mesh::SUBDIVISION_NONE);
}
if(!b_mesh) {
@@ -1157,10 +1142,12 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob,
{
/* no motion, remove attributes again */
if(b_mesh.vertices.length() != numverts) {
- VLOG(1) << "Topology differs, disabling motion blur.";
+ VLOG(1) << "Topology differs, disabling motion blur for object "
+ << b_ob.name();
}
else {
- VLOG(1) << "No actual deformation motion for object " << b_ob.name();
+ VLOG(1) << "No actual deformation motion for object "
+ << b_ob.name();
}
mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr_mN)
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index f7f77df..637cf7a 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -25,6 +25,7 @@
#include "particles.h"
#include "shader.h"
+#include "blender_object_cull.h"
#include "blender_sync.h"
#include "blender_util.h"
@@ -153,6 +154,7 @@ void BlenderSync::sync_light(BL::Object& b_parent,
/* location and (inverted!) direction */
light->co = transform_get_column(&tfm, 3);
light->dir = -transform_get_column(&tfm, 2);
+ light->tfm = tfm;
/* shader */
vector<Shader*> used_shaders;
@@ -234,55 +236,6 @@ void BlenderSync::sync_background_light(bool use_portal)
/* Object */
-/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
- * to reduce number of objects which are wrongly considered visible.
- */
-static bool object_boundbox_clip(Scene *scene,
- BL::Object& b_ob,
- Transform& tfm,
- float margin)
-{
- Camera *cam = scene->camera;
- Transform& worldtondc = cam->worldtondc;
- BL::Array<float, 24> boundbox = b_ob.bound_box();
- float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
- bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
- bool all_behind = true;
- for(int i = 0; i < 8; ++i) {
- float3 p = make_float3(boundbox[3 * i + 0],
- boundbox[3 * i + 1],
- boundbox[3 * i + 2]);
- p = transform_point(&tfm, p);
-
- float4 b = make_float4(p.x, p.y, p.z, 1.0f);
- float4 c = make_float4(dot(worldtondc.x, b),
- dot(worldtondc.y, b),
- dot(worldtondc.z, b),
- dot(worldtondc.w, b));
- p = float4_to_float3(c / c.w);
- if(c.z < 0.0f) {
- p.x = 1.0f - p.x;
- p.y = 1.0f - p.y;
- }
- if(c.z >= -margin) {
- all_behind = false;
- }
- bb_min = min(bb_min, p);
- bb_max = max(bb_max, p);
- }
- if(!all_behind) {
- if(bb_min.x >= 1.0f + margin ||
- bb_min.y >= 1.0f + margin ||
- bb_max.x <= -margin ||
- bb_max.y <= -margin)
- {
- return true;
- }
- return false;
- }
- return true;
-}
-
Object *BlenderSync::sync_object(BL::Object& b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::DupliObject& b_dupli_ob,
@@ -290,8 +243,7 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
uint layer_flag,
float motion_time,
bool hide_tris,
- bool use_camera_cull,
- float camera_cull_margin,
+ BlenderObjectCulling& culling,
bool *use_portal)
{
BL::Object b_ob = (b_dupli_ob ? b_dupli_ob.object() : b_parent);
@@ -307,11 +259,12 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
}
/* only interested in object that we can create meshes from */
- if(!object_is_mesh(b_ob))
+ if(!object_is_mesh(b_ob)) {
return NULL;
+ }
- /* Perform camera space culling. */
- if(use_camera_cull && object_boundbox_clip(scene, b_ob, tfm, camera_cull_margin)) {
+ /* Perform object culling. */
+ if(culling.test(scene, b_ob, tfm)) {
return NULL;
}
@@ -547,17 +500,8 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
mesh_motion_synced.clear();
}
- bool allow_camera_cull = false;
- float camera_cull_margin = 0.0f;
- if(b_scene.render().use_simplify()) {
- PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
- allow_camera_cull = scene->camera->type != CAMERA_PANORAMA &&
- !b_scene.render().use_multiview() &&
- get_boolean(cscene, "use_camera_cull");
- if(allow_camera_cull) {
- camera_cull_margin = get_float(cscene, "camera_cull_margin");
- }
- }
+ /* initialize culling */
+ BlenderObjectCulling culling(scene, b_scene);
/* object loop */
BL::Scene::object_bases_iterator b_base;
@@ -589,12 +533,9 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
if(!hide) {
progress.set_sync_status("Synchronizing object", b_ob.name());
- PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
- bool use_camera_cull = allow_camera_cull && get_boolean(cobject, "use_camera_cull");
- if(use_camera_cull) {
- /* Need to have proper projection matrix. */
- scene->camera->update();
- }
+ /* load per-object culling data */
+ culling.init_object(scene, b_ob);
+
if(b_ob.is_duplicator() && !object_render_hide_duplis(b_ob)) {
/* dupli objects */
b_ob.dupli_list_create(b_scene, dupli_settings);
@@ -621,8 +562,7 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
ob_layer,
motion_time,
hide_tris,
- use_camera_cull,
- camera_cull_margin,
+ culling,
&use_portal);
/* sync possible particle data, note particle_id
@@ -651,8 +591,7 @@ void BlenderSync::sync_objects(BL::SpaceView3D& b_v3d, float motion_time)
ob_layer,
motion_time,
hide_tris,
- use_camera_cull,
- camera_cull_margin,
+ culling,
&use_portal);
}
}
diff --git a/intern/cycles/blender/blender_object_cull.cpp b/intern/cycles/blender/blender_object_cull.cpp
new file mode 100644
index 0000000..08918dd
--- /dev/null
+++ b/intern/cycles/blender/blender_object_cull.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdlib>
+
+#include "camera.h"
+
+#include "blender_object_cull.h"
+
+CCL_NAMESPACE_BEGIN
+
+BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
+ : use_scene_camera_cull_(false),
+ use_camera_cull_(false),
+ camera_cull_margin_(0.0f),
+ use_scene_distance_cull_(false),
+ use_distance_cull_(false),
+ distance_cull_margin_(0.0f)
+{
+ if(b_scene.render().use_simplify()) {
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+ use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
+ !b_scene.render().use_multiview() &&
+ get_boolean(cscene, "use_camera_cull");
+ use_scene_distance_cull_ = scene->camera->type != CAMERA_PANORAMA &&
+ !b_scene.render().use_multiview() &&
+ get_boolean(cscene, "use_distance_cull");
+
+ camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
+ distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
+
+ if(distance_cull_margin_ == 0.0f) {
+ use_scene_distance_cull_ = false;
+ }
+ }
+}
+
+void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
+{
+ if(!use_scene_camera_cull_ && !use_scene_distance_cull_) {
+ return;
+ }
+
+ PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+
+ use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
+ use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
+
+ if(use_camera_cull_ || use_distance_cull_) {
+ /* Need to have proper projection matrix. */
+ scene->camera->update();
+ }
+}
+
+bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
+{
+ if(!use_camera_cull_ && !use_distance_cull_) {
+ return false;
+ }
+
+ /* Compute world space bounding box corners. */
+ float3 bb[8];
+ BL::Array<float, 24> boundbox = b_ob.bound_box();
+ for(int i = 0; i < 8; ++i) {
+ float3 p = make_float3(boundbox[3 * i + 0],
+ boundbox[3 * i + 1],
+ boundbox[3 * i + 2]);
+ bb[i] = transform_point(&tfm, p);
+ }
+
+ bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
+ bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
+
+ return ((camera_culled && distance_culled) ||
+ (camera_culled && !use_distance_cull_) ||
+ (distance_culled && !use_camera_cull_));
+}
+
+/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
+ * to reduce number of objects which are wrongly considered visible.
+ */
+bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
+{
+ Camera *cam = scene->camera;
+ Transform& worldtondc = cam->worldtondc;
+ float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
+ bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+ bool all_behind = true;
+ for(int i = 0; i < 8; ++i) {
+ float3 p = bb[i];
+ float4 b = make_float4(p.x, p.y, p.z, 1.0f);
+ float4 c = make_float4(dot(worldtondc.x, b),
+ dot(worldtondc.y, b),
+ dot(worldtondc.z, b),
+ dot(worldtondc.w, b));
+ p = float4_to_float3(c / c.w);
+ if(c.z < 0.0f) {
+ p.x = 1.0f - p.x;
+ p.y = 1.0f - p.y;
+ }
+ if(c.z >= -camera_cull_margin_) {
+ all_behind = false;
+ }
+ bb_min = min(bb_min, p);
+ bb_max = max(bb_max, p);
+ }
+ if(all_behind) {
+ return true;
+ }
+ return (bb_min.x >= 1.0f + camera_cull_margin_ ||
+ bb_min.y >= 1.0f + camera_cull_margin_ ||
+ bb_max.x <= -camera_cull_margin_ ||
+ bb_max.y <= -camera_cull_margin_);
+}
+
+bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
+{
+ float3 camera_position = transform_get_column(&scene->camera->matrix, 3);
+ float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
+ bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+
+ /* Find min & max points for x & y & z on bounding box */
+ for(int i = 0; i < 8; ++i) {
+ float3 p = bb[i];
+ bb_min = min(bb_min, p);
+ bb_max = max(bb_max, p);
+ }
+
+ float3 closest_point = max(min(bb_max,camera_position),bb_min);
+ return (len_squared(camera_position - closest_point) >
+ distance_cull_margin_ * distance_cull_margin_);
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/render/background.h b/intern/cycles/blender/blender_object_cull.h
similarity index 50%
copy from intern/cycles/render/background.h
copy to intern/cycles/blender/blender_object_cull.h
index 8029c6a..b6f0ca5 100644
--- a/intern/cycles/render/background.h
+++ b/intern/cycles/blender/blender_object_cull.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2011-2013 Blender Foundation
+ * Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,47 +14,36 @@
* limitations under the License.
*/
-#ifndef __BACKGROUND_H__
-#define __BACKGROUND_H__
-
-#include "node.h"
+#ifndef __BLENDER_OBJECT_CULL_H__
+#define __BLENDER_OBJECT_CULL_H__
+#include "blender_sync.h"
#include "util_types.h"
CCL_NAMESPACE_BEGIN
-class Device;
-class DeviceScene;
class Scene;
-class Shader;
-class Background : public Node {
+class BlenderObjectCulling
+{
public:
- NODE_DECLARE;
-
- float ao_factor;
- float ao_distance;
-
- bool use_shader;
- bool use_ao;
+ BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
- uint visibility;
- Shader *shader;
+ void init_object(Scene *scene, BL::Object& b_ob);
+ bool test(Scene *scene, BL::Object& b_ob, Transform& tfm);
- bool transparent;
- bool need_update;
+private:
+ bool test_camera(Scene *scene, float3 bb[8]);
+ bool test_distance(Scene *scene, float3 bb[8]);
- Background();
- ~Background();
-
- void device_update(Device *device, DeviceScene *dscene, Scene *scene);
- void device_free(Device *device, DeviceScene *dscene);
-
- bool modified(const Background& background);
- void tag_update(Scene *scene);
+ bool use_scene_camera_cull_;
+ bool use_camera_cull_;
+ float camera_cull_margin_;
+ bool use_scene_distance_cull_;
+ bool use_distance_cull_;
+ float distance_cull_margin_;
};
CCL_NAMESPACE_END
-#endif /* __BACKGROUND_H__ */
-
+#endif /* __BLENDER_OBJECT_CULL_H__ */
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index a50f5ed..438abc4 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -40,10 +40,6 @@ CCL_NAMESPACE_BEGIN
namespace {
-/* Device list stored static (used by compute_device_list()). */
-static ccl::vector<CCLDeviceInfo> device_list;
-static ccl::DeviceType device_type = DEVICE_NONE;
-
/* Flag describing whether debug flags were synchronized from scene. */
bool debug_flags_set = false;
@@ -195,7 +191,6 @@ static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
ShaderManager::free_memory();
TaskScheduler::free_memory();
Device::free_memory();
- device_list.free_memory();
Py_RETURN_NONE;
}
@@ -389,7 +384,12 @@ static PyObject *available_devices_func(PyObject * /*self*/, PyObject * /*args*/
for(size_t i = 0; i < devices.size(); i++) {
DeviceInfo& device = devices[i];
- PyTuple_SET_ITEM(ret, i, PyUnicode_FromString(device.description.c_str()));
+ string type_name = Device::string_from_type(device.type);
+ PyObject *device_tuple = PyTuple_New(3);
+ PyTuple_SET_ITEM(device_tuple, 0, PyUnicode_FromString(device.description.c_str()));
+ PyTuple_SET_ITEM(device_tuple, 1, PyUnicode_FromString(type_name.c_str()));
+ PyTuple_SET_ITEM(device_tuple, 2, PyUnicode_FromString(device.id.c_str()));
+ PyTuple_SET_ITEM(ret, i, device_tuple);
}
return ret;
@@ -676,6 +676,20 @@ static PyObject *set_resumable_chunks_func(PyObject * /*self*/, PyObject *args)
Py_RETURN_NONE;
}
+static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
+{
+ vector<DeviceInfo>& devices = Device::available_devices();
+ bool has_cuda = false, has_opencl = false;
+ for(int i = 0; i < devices.size(); i++) {
+ has_cuda |= (devices[i].type == DEVICE_CUDA);
+ has_opencl |= (devices[i].type == DEVICE_OPENCL);
+ }
+ PyObject *list = PyTuple_New(2);
+ PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
+ PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_opencl));
+ return list;
+}
+
static PyMethodDef methods[] = {
{"init", init_func, METH_VARARGS, ""},
{"exit", exit_func, METH_VARARGS, ""},
@@ -703,6 +717,9 @@ static PyMethodDef methods[] = {
/* Resumable render */
{"set_resumable_chunks", set_resumable_chunks_func, METH_VARARGS, ""},
+ /* Compute Device selection */
+ {"get_device_types", get_device_types_func, METH_VARARGS, ""},
+
{NULL, NULL, 0, NULL},
};
@@ -715,47 +732,6 @@ static struct PyModuleDef module = {
NULL, NULL, NULL, NULL
};
-static CCLDeviceInfo *compute_device_list(DeviceType type)
-{
- /* create device list if it's not already done */
- if(type != device_type) {
- ccl::vector<DeviceInfo>& devices = ccl::Device::available_devices();
-
- device_type = type;
- device_list.clear();
-
- /* add devices */
- int i = 0;
-
- foreach(DeviceInfo& info, devices) {
- if(info.type == type ||
- (info.type == DEVICE_MULTI && info.multi_devices[0].type == type))
- {
- CCLDeviceInfo cinfo;
-
- strncpy(cinfo.identifier, info.id.c_str(), sizeof(cinfo.identifier));
- cinfo.identifier[info.id.length()] = '\0';
-
- strncpy(cinfo.name, info.description.c_str(), sizeof(cinfo.name));
- cinfo.name[info.description.length()] = '\0';
-
- cinfo.value = i++;
-
- device_list.push_back(cinfo);
- }
- }
-
- /* null terminate */
- if(!device_list.empty()) {
- CCLDeviceInfo cinfo = {"", "", 0};
- device_list.push_back(cinfo);
- }
- }
-
- return (device_list.empty())? NULL: &device_list[0];
-}
-
-
CCL_NAMESPACE_END
void *CCL_python_module_init()
@@ -794,24 +770,3 @@ void *CCL_python_module_init()
return (void*)mod;
}
-
-CCLDeviceInfo *CCL_compute_device_list(int device_type)
-{
- ccl::DeviceType type;
- switch(device_type) {
- case 0:
- type = ccl::DEVICE_CUDA;
- break;
- case 1:
- type = ccl::DEVICE_OPENCL;
- break;
- case 2:
- type = ccl::DEVICE_NETWORK;
- break;
- default:
- type = ccl::DEVICE_NONE;
- break;
- }
- return ccl::compute_device_list(type);
-}
-
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index c250a54..2f30cbd 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -32,6 +32,7 @@
#include "util_color.h"
#include "util_foreach.h"
#include "util_function.h"
+#include "util_hash.h"
#include "util_logging.h"
#include "util_progress.h"
#include "util_time.h"
@@ -125,8 +126,8 @@ void BlenderSession::create_session()
/* setup callbacks for builtin image support */
scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7);
- scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3);
- scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3);
+ scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4);
+ scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4);
/* create session */
session = new Session(session_params);
@@ -304,12 +305,16 @@ static PassType get_pass_type(BL::RenderPass& b_pass)
#ifdef WITH_CYCLES_DEBUG
case BL::RenderPass::type_DEBUG:
{
- if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSAL_STEPS)
- return PASS_BVH_TRAVERSAL_STEPS;
- if(b_pass.debug_type() == BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES)
- return PASS_BVH_TRAVERSED_INSTANCES;
- if(b_pass.debug_type() == BL::RenderPass::debug_type_RAY_BOUNCES)
- return PASS_RAY_BOUNCES;
+ switch(b_pass.debug_type()) {
+ case BL::RenderPass::debug_type_BVH_TRAVERSED_NODES:
+ return PASS_BVH_TRAVERSED_NODES;
+ case BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES:
+ return PASS_BVH_TRAVERSED_INSTANCES;
+ case BL::RenderPass::debug_type_BVH_INTERSECTIONS:
+ return PASS_BVH_INTERSECTIONS;
+ case BL::RenderPass::debug_type_RAY_BOUNCES:
+ return PASS_RAY_BOUNCES;
+ }
break;
}
#endif
@@ -498,7 +503,8 @@ void BlenderSession::render()
scene->film->tag_update(scene);
scene->integrator->tag_update(scene);
- for(b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter) {
+ int view_index = 0;
+ for(b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter, ++view_index) {
b_rview_name = b_view_iter->name();
/* set the current view */
@@ -514,6 +520,12 @@ void BlenderSession::render()
&python_thread_state,
b_rlay_name.c_str());
+ /* Make sure all views have different noise patterns. - hardcoded value just to make it random */
+ if(view_index != 0) {
+ scene->integrator->seed += hash_int_2d(scene->integrator->seed, hash_int(view_index * 0xdeadbeef));
+ scene->integrator->tag_update(scene);
+ }
+
/* Update number of samples per layer. */
int samples = sync->get_layer_samples();
bool bound_samples = sync->get_layer_bound_samples();
@@ -572,7 +584,7 @@ static void populate_bake_data(BakeData *data, const
BL::BakePixel bp = pixel_array;
int i;
- for(i=0; i < num_pixels; i++) {
+ for(i = 0; i < num_pixels; i++) {
if(bp.object_id() == object_id) {
data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
} else {
@@ -922,38 +934,13 @@ void BlenderSession::get_status(string& status, string& substatus)
void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
{
- double tile_time;
- int tile, sample, samples_per_tile;
- int tile_total = session->tile_manager.state.num_tiles;
- int samples = session->tile_manager.state.sample + 1;
- int total_samples = session->tile_manager.get_num_effective_samples();
-
- session->progress.get_tile(tile, total_time, render_time, tile_time);
-
- sample = session->progress.get_sample();
- samples_per_tile = session->tile_manager.get_num_effective_samples();
-
- if(background && samples_per_tile && tile_total)
- progress = ((float)sample / (float)(tile_total * samples_per_tile));
- else if(!background && samples > 0 && total_samples != INT_MAX)
- progress = ((float)samples) / total_samples;
- else
- progress = 0.0;
+ session->progress.get_time(total_time, render_time);
+ progress = session->progress.get_progress();
}
void BlenderSession::update_bake_progress()
{
- float progress;
- int sample, samples_per_task, parts_total;
-
- sample = session->progress.get_sample();
- samples_per_task = scene->bake_manager->num_samples;
- parts_total = scene->bake_manager->num_parts;
-
- if(samples_per_task)
- progress = ((float)sample / (float)(parts_total * samples_per_task));
- else
- progress = 0.0;
+ float progress = session->progress.get_progress();
if(progress != last_progress) {
b_engine.update_progress(progress);
@@ -1072,7 +1059,13 @@ int BlenderSession::builtin_image_frame(const string &builtin_name)
return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
}
-void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels)
+void BlenderSession::builtin_image_info(const string &builtin_name,
+ void *builtin_data,
+ bool &is_float,
+ int &width,
+ int &height,
+ int &depth,
+ int &channels)
{
/* empty image */
is_float = false;
@@ -1150,60 +1143,67 @@ void BlenderSession::builtin_image_info(const string &builtin_name, void *builti
}
}
-bool BlenderSession::builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels)
+bool BlenderSession::builtin_image_pixels(const string &builtin_name,
+ void *builtin_data,
+ unsigned char *pixels,
+ const size_t pixels_size)
{
- if(!builtin_data)
+ if(!builtin_data) {
return false;
+ }
- int frame = builtin_image_frame(builtin_name);
+ const int frame = builtin_image_frame(builtin_name);
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
BL::Image b_image(ptr);
- int width = b_image.size()[0];
- int height = b_image.size()[1];
- int channels = b_image.channels();
+ const int width = b_image.size()[0];
+ const int height = b_image.size()[1];
+ const int channels = b_image.channels();
- unsigned char *image_pixels;
- image_pixels = image_get_pixels_for_frame(b_image, frame);
- size_t num_pixels = ((size_t)width) * height;
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame);
+ const size_t num_pixels = ((size_t)width) * height;
- if(image_pixels) {
- memcpy(pixels, image_pixels, num_pixels * channels * sizeof(unsigned char));
+ if(image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
MEM_freeN(image_pixels);
}
else {
if(channels == 1) {
- memset(pixels, 0, num_pixels * sizeof(unsigned char));
+ memset(pixels, 0, pixels_size * sizeof(unsigned char));
}
else {
+ const size_t num_pixels_safe = pixels_size / channels;
unsigned char *cp = pixels;
- for(size_t i = 0; i < num_pixels; i++, cp += channels) {
+ for(size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
cp[0] = 255;
cp[1] = 0;
cp[2] = 255;
- if(channels == 4)
+ if(channels == 4) {
cp[3] = 255;
+ }
}
}
}
-
- /* premultiply, byte images are always straight for blender */
+ /* Premultiply, byte images are always straight for Blender. */
unsigned char *cp = pixels;
for(size_t i = 0; i < num_pixels; i++, cp += channels) {
cp[0] = (cp[0] * cp[3]) >> 8;
cp[1] = (cp[1] * cp[3]) >> 8;
cp[2] = (cp[2] * cp[3]) >> 8;
}
-
return true;
}
-bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels)
+bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
+ void *builtin_data,
+ float *pixels,
+ const size_t pixels_size)
{
- if(!builtin_data)
+ if(!builtin_data) {
return false;
+ }
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
@@ -1214,16 +1214,16 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void
BL::Image b_image(b_id);
int frame = builtin_image_frame(builtin_name);
- int width = b_image.size()[0];
- int height = b_image.size()[1];
- int channels = b_image.channels();
+ const int width = b_image.size()[0];
+ const int height = b_image.size()[1];
+ const int channels = b_image.channels();
float *image_pixels;
image_pixels = image_get_float_pixels_for_frame(b_image, frame);
- size_t num_pixels = ((size_t)width) * height;
+ const size_t num_pixels = ((size_t)width) * height;
- if(image_pixels) {
- memcpy(pixels, image_pixels, num_pixels * channels * sizeof(float));
+ if(image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(float));
MEM_freeN(image_pixels);
}
else {
@@ -1231,13 +1231,15 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void
memset(pixels, 0, num_pixels * sizeof(float));
}
else {
+ const size_t num_pixels_safe = pixels_size / channels;
float *fp = pixels;
- for(int i = 0; i < num_pixels; i++, fp += channels) {
+ for(int i = 0; i < num_pixels_safe; i++, fp += channels) {
fp[0] = 1.0f;
fp[1] = 0.0f;
fp[2] = 1.0f;
- if(channels == 4)
+ if(channels == 4) {
fp[3] = 1.0f;
+ }
}
}
}
@@ -1249,8 +1251,9 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void
BL::Object b_ob(b_id);
BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
- if(!b_domain)
+ if(!b_domain) {
return false;
+ }
int3 resolution = get_int3(b_domain.domain_resolution());
int length, amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1;
@@ -1262,10 +1265,10 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void
amplify = 1;
}
- int width = resolution.x * amplify;
- int height = resolution.y * amplify;
- int depth = resolution.z * amplify;
- size_t num_pixels = ((size_t)width) * height * depth;
+ const int width = resolution.x * amplify;
+ const int height = resolution.y * amplify;
+ const int depth = resolution.z * amplify;
+ const size_t num_pixels = ((size_t)width) * height * depth;
if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
@@ -1349,6 +1352,9 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
VLOG(1) << "Samples range start is " << range_start_sample << ", "
<< "number of samples to render is " << range_num_samples;
+ scene->integrator->start_sample = range_start_sample;
+ scene->integrator->tag_update(scene);
+
session->tile_manager.range_start_sample = range_start_sample;
session->tile_manager.range_num_samples = range_num_samples;
}
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 66a6945..82fe218 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -145,9 +145,21 @@ protected:
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only);
int builtin_image_frame(const string &builtin_name);
- void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels);
- bool builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels);
- bool builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels);
+ void builtin_image_info(const string &builtin_name,
+ void *builtin_data,
+ bool &is_float,
+ int &width,
+ int &height,
+ int &depth,
+ int &channels);
+ bool builtin_image_pixels(const string &builtin_name,
+ void *builtin_data,
+ unsigned char *pixels,
+ const size_t pixels_size);
+ bool builtin_image_float_pixels(const string &builtin_name,
+ void *builtin_data,
+ float *pixels,
+ const size_t pixels_size);
/* Update tile manager to reflect resumable render settings. */
void update_resumable_tile_manager(int num_samples);
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 4ca202a..f8f2303 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -255,8 +255,17 @@ void BlenderSync::sync_integrator()
integrator->filter_glossy = get_float(cscene, "blur_glossy");
integrator->seed = get_int(cscene, "seed");
- if(get_boolean(cscene, "use_animated_seed"))
- integrator->seed = hash_int_2d(b_scene.frame_current(), get_int(cscene, "seed"));
+ if(get_boolean(cscene, "use_animated_seed")) {
+ integrator->seed = hash_int_2d(b_scene.frame_current(),
+ get_int(cscene, "seed"));
+ if(b_scene.frame_subframe() != 0.0f) {
+ /* TODO(sergey): Ideally should be some sort of hash_merge,
+ * but this is good enough for now.
+ */
+ integrator->seed += hash_int_2d((int)(b_scene.frame_subframe() * (float)INT_MAX),
+ get_int(cscene, "seed"));
+ }
+ }
integrator->sampling_pattern = (SamplingPattern)get_enum(
cscene,
@@ -284,6 +293,7 @@ void BlenderSync::sync_integrator()
integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct");
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
+ integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");
int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
@@ -488,12 +498,27 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
+ params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
if(background && params.shadingsystem != SHADINGSYSTEM_OSL)
params.persistent_data = r.use_persistent_data();
else
params.persistent_data = false;
+ int texture_limit;
+ if(background) {
+ texture_limit = RNA_enum_get(&cscene, "texture_limit_render");
+ }
+ else {
+ texture_limit = RNA_enum_get(&cscene, "texture_limit");
+ }
+ if(texture_limit > 0 && b_scene.render().use_simplify()) {
+ params.texture_limit = 1 << (texture_limit + 6);
+ }
+ else {
+ params.texture_limit = 0;
+ }
+
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
if(is_cpu) {
params.use_qbvh = DebugFlags().cpu.qbvh && system_cpu_support_sse2();
@@ -530,7 +555,12 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
vector<DeviceInfo>& devices = Device::available_devices();
/* device default CPU */
- params.device = devices[0];
+ foreach(DeviceInfo& device, devices) {
+ if(device.type == DEVICE_CPU) {
+ params.device = device;
+ break;
+ }
+ }
if(get_enum(cscene, "device") == 2) {
/* find network device */
@@ -539,17 +569,39 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
params.device = info;
}
else if(get_enum(cscene, "device") == 1) {
- /* find GPU device with given id */
- PointerRNA systemptr = b_userpref.system().ptr;
- PropertyRNA *deviceprop = RNA_struct_find_property(&systemptr, "compute_device");
- int device_id = b_userpref.system().compute_device();
+ PointerRNA b_preferences;
- const char *id;
+ BL::UserPreferences::addons_iterator b_addon_iter;
+ for(b_userpref.addons.begin(b_addon_iter); b_addon_iter != b_userpref.addons.end(); ++b_addon_iter) {
+ if(b_addon_iter->module() == "cycles") {
+ b_preferences = b_addon_iter->preferences().ptr;
+ break;
+ }
+ }
- if(RNA_property_enum_identifier(NULL, &systemptr, deviceprop, device_id, &id)) {
- foreach(DeviceInfo& info, devices)
- if(info.id == id)
- params.device = info;
+ int compute_device = get_enum(b_preferences, "compute_device_type");
+
+ if(compute_device != 0) {
+ vector<DeviceInfo> used_devices;
+ RNA_BEGIN(&b_preferences, device, "devices") {
+ if(get_enum(device, "type") == compute_device && get_boolean(device, "use")) {
+ string id = get_string(device, "id");
+ foreach(DeviceInfo& info, devices) {
+ if(info.id == id) {
+ used_devices.push_back(info);
+ break;
+ }
+ }
+ }
+ } RNA_END
+
+ if(used_devices.size() == 1) {
+ params.device = used_devices[0];
+ }
+ else if(used_devices.size() > 1) {
+ params.device = Device::get_multi_device(used_devices);
+ }
+ /* Else keep using the CPU device that was set before. */
}
}
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 9a01b4f..6984cbd 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -35,6 +35,7 @@
CCL_NAMESPACE_BEGIN
class Background;
+class BlenderObjectCulling;
class Camera;
class Film;
class Light;
@@ -122,8 +123,7 @@ private:
uint layer_flag,
float motion_time,
bool hide_tris,
- bool use_camera_cull,
- float camera_cull_margin,
+ BlenderObjectCulling& culling,
bool *use_portal);
void sync_light(BL::Object& b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index f17a61f..b67834c 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -48,12 +48,12 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
bool apply_modifiers,
bool render,
bool calc_undeformed,
- bool subdivision)
+ Mesh::SubdivisionType subdivision_type)
{
bool subsurf_mod_show_render;
bool subsurf_mod_show_viewport;
- if(subdivision) {
+ if(subdivision_type != Mesh::SUBDIVISION_NONE) {
BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
subsurf_mod_show_render = subsurf_mod.show_render();
@@ -65,7 +65,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
BL::Mesh me = data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, false, calc_undeformed);
- if(subdivision) {
+ if(subdivision_type != Mesh::SUBDIVISION_NONE) {
BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length()-1];
subsurf_mod.show_render(subsurf_mod_show_render);
@@ -74,9 +74,14 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
if((bool)me) {
if(me.use_auto_smooth()) {
- me.calc_normals_split();
+ if(subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK) {
+ me.calc_normals_split();
+ }
+ else {
+ me.split_faces();
+ }
}
- if(!subdivision) {
+ if(subdivision_type == Mesh::SUBDIVISION_NONE) {
me.calc_tessface(true);
}
}
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 1570520..874a424 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -597,7 +597,7 @@ void RegularBVH::pack_nodes(const BVHNode *root)
else {
/* innner node */
int idx[2];
- for (int i = 0; i < 2; ++i) {
+ for(int i = 0; i < 2; ++i) {
if(e.node->get_child(i)->is_leaf()) {
idx[i] = nextLeafNodeIdx++;
}
@@ -835,13 +835,39 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
{
+ BoundBox bounds[4];
+ int child[4];
+ for(int i = 0; i < num; ++i) {
+ bounds[i] = en[i].node->m_bounds;
+ child[i] = en[i].encodeIdx();
+ }
+ pack_aligned_node(e.idx,
+ bounds,
+ child,
+ e.node->m_visibility,
+ e.node->m_time_from,
+ e.node->m_time_to,
+ num);
+}
+
+void QBVH::pack_aligned_node(int idx,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num)
+{
float4 data[BVH_QNODE_SIZE];
memset(data, 0, sizeof(data));
- data[0].x = __uint_as_float(e.node->m_visibility & ~PATH_RAY_NODE_UNALIGNED);
+ data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
+ data[0].y = time_from;
+ data[0].z = time_to;
+
for(int i = 0; i < num; i++) {
- float3 bb_min = en[i].node->m_bounds.min;
- float3 bb_max = en[i].node->m_bounds.max;
+ float3 bb_min = bounds[i].min;
+ float3 bb_max = bounds[i].max;
data[1][i] = bb_min.x;
data[2][i] = bb_max.x;
@@ -850,7 +876,7 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
data[5][i] = bb_min.z;
data[6][i] = bb_max.z;
- data[7][i] = __int_as_float(en[i].encodeIdx());
+ data[7][i] = __int_as_float(child[i]);
}
for(int i = num; i < 4; i++) {
@@ -869,22 +895,51 @@ void QBVH::pack_aligned_inner(const BVHStackEntry& e,
data[7][i] = __int_as_float(0);
}
- memcpy(&pack.nodes[e.idx], data, sizeof(float4)*BVH_QNODE_SIZE);
+ memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
}
void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
{
+ Transform aligned_space[4];
+ BoundBox bounds[4];
+ int child[4];
+ for(int i = 0; i < num; ++i) {
+ aligned_space[i] = en[i].node->get_aligned_space();
+ bounds[i] = en[i].node->m_bounds;
+ child[i] = en[i].encodeIdx();
+ }
+ pack_unaligned_node(e.idx,
+ aligned_space,
+ bounds,
+ child,
+ e.node->m_visibility,
+ e.node->m_time_from,
+ e.node->m_time_to,
+ num);
+}
+
+void QBVH::pack_unaligned_node(int idx,
+ const Transform *aligned_space,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num)
+{
float4 data[BVH_UNALIGNED_QNODE_SIZE];
memset(data, 0, sizeof(data));
- data[0].x = __uint_as_float(e.node->m_visibility | PATH_RAY_NODE_UNALIGNED);
+ data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
+ data[0].y = time_from;
+ data[0].z = time_to;
for(int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
- en[i].node->m_bounds,
- en[i].node->get_aligned_space());
+ bounds[i],
+ aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
@@ -902,7 +957,7 @@ void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
data[11][i] = space.y.w;
data[12][i] = space.z.w;
- data[13][i] = __int_as_float(en[i].encodeIdx());
+ data[13][i] = __int_as_float(child[i]);
}
for(int i = num; i < 4; i++) {
@@ -929,7 +984,7 @@ void QBVH::pack_unaligned_inner(const BVHStackEntry& e,
data[13][i] = __int_as_float(0);
}
- memcpy(&pack.nodes[e.idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
+ memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
}
/* Quad SIMD Nodes */
@@ -1155,61 +1210,28 @@ void QBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
}
}
- /* TODO(sergey): To be de-duplicated with pack_inner(),
- * but for that need some sort of pack_node(). which operates with
- * direct data, not stack element.
- */
if(is_unaligned) {
- Transform aligned_space = transform_identity();
- float4 inner_data[BVH_UNALIGNED_QNODE_SIZE];
- inner_data[0] = make_float4(
- __int_as_float(visibility | PATH_RAY_NODE_UNALIGNED),
- 0.0f,
- 0.0f,
- 0.0f);
- for(int i = 0; i < 4; ++i) {
- Transform space = BVHUnaligned::compute_node_transform(
- child_bbox[i],
- aligned_space);
- inner_data[1][i] = space.x.x;
- inner_data[2][i] = space.x.y;
- inner_data[3][i] = space.x.z;
-
- inner_data[4][i] = space.y.x;
- inner_data[5][i] = space.y.y;
- inner_data[6][i] = space.y.z;
-
- inner_data[7][i] = space.z.x;
- inner_data[8][i] = space.z.y;
- inner_data[9][i] = space.z.z;
-
- inner_data[10][i] = space.x.w;
- inner_data[11][i] = space.y.w;
- inner_data[12][i] = space.z.w;
-
- inner_data[13][i] = __int_as_float(c[i]);
- }
- memcpy(&pack.nodes[idx], inner_data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
+ Transform aligned_space[4] = {transform_identity(),
+ transform_identity(),
+ transform_identity(),
+ transform_identity()};
+ pack_unaligned_node(idx,
+ aligned_space,
+ child_bbox,
+ &c[0],
+ visibility,
+ 0.0f,
+ 1.0f,
+ 4);
}
else {
- float4 inner_data[BVH_QNODE_SIZE];
- inner_data[0] = make_float4(
- __int_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED),
- 0.0f,
- 0.0f,
- 0.0f);
- for(int i = 0; i < 4; ++i) {
- float3 bb_min = child_bbox[i].min;
- float3 bb_max = child_bbox[i].max;
- inner_data[1][i] = bb_min.x;
- inner_data[2][i] = bb_max.x;
- inner_data[3][i] = bb_min.y;
- inner_data[4][i] = bb_max.y;
- inner_data[5][i] = bb_min.z;
- inner_data[6][i] = bb_max.z;
- inner_data[7][i] = __int_as_float(c[i]);
- }
- memcpy(&pack.nodes[idx], inner_data, sizeof(float4)*BVH_QNODE_SIZE);
+ pack_aligned_node(idx,
+ child_bbox,
+ &c[0],
+ visibility,
+ 0.0f,
+ 1.0f,
+ 4);
}
}
}
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 1675207..35f4d30 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -171,9 +171,25 @@ protected:
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
+ void pack_aligned_node(int idx,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num);
+
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
+ void pack_unaligned_node(int idx,
+ const Transform *aligned_space,
+ const BoundBox *bounds,
+ const int *child,
+ const uint visibility,
+ const float time_from,
+ const float time_to,
+ const int num);
/* refit */
void refit_nodes();
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 14f66ac..a2f8b33 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -26,6 +26,7 @@
#include "scene.h"
#include "curves.h"
+#include "util_algorithm.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_logging.h"
@@ -112,81 +113,237 @@ BVHBuild::~BVHBuild()
/* Adding References */
-void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
- if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
- Attribute *attr_mP = NULL;
-
- if(mesh->has_motion_blur())
- attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- size_t num_triangles = mesh->num_triangles();
- for(uint j = 0; j < num_triangles; j++) {
- Mesh::Triangle t = mesh->get_triangle(j);
+ const Attribute *attr_mP = NULL;
+ if(mesh->has_motion_blur()) {
+ attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ }
+ const size_t num_triangles = mesh->num_triangles();
+ for(uint j = 0; j < num_triangles; j++) {
+ Mesh::Triangle t = mesh->get_triangle(j);
+ const float3 *verts = &mesh->verts[0];
+ if(attr_mP == NULL) {
BoundBox bounds = BoundBox::empty;
- PrimitiveType type = PRIMITIVE_TRIANGLE;
-
- t.bounds_grow(&mesh->verts[0], bounds);
-
- /* motion triangles */
- if(attr_mP) {
- size_t mesh_size = mesh->verts.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *vert_steps = attr_mP->data_float3();
-
- for(size_t i = 0; i < steps; i++)
- t.bounds_grow(vert_steps + i*mesh_size, bounds);
-
- type = PRIMITIVE_MOTION_TRIANGLE;
+ t.bounds_grow(verts, bounds);
+ if(bounds.valid()) {
+ references.push_back(BVHReference(bounds,
+ j,
+ i,
+ PRIMITIVE_TRIANGLE));
+ root.grow(bounds);
+ center.grow(bounds.center2());
+ }
+ }
+ else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
+ /* Motion triangles, simple case: single node for the whole
+ * primitive. Lowest memory footprint and faster BVH build but
+ * least optimal ray-tracing.
+ */
+ /* TODO(sergey): Support motion steps for spatially split BVH. */
+ const size_t num_verts = mesh->verts.size();
+ const size_t num_steps = mesh->motion_steps;
+ const float3 *vert_steps = attr_mP->data_float3();
+ BoundBox bounds = BoundBox::empty;
+ t.bounds_grow(verts, bounds);
+ for(size_t step = 0; step < num_steps - 1; step++) {
+ t.bounds_grow(vert_steps + step*num_verts, bounds);
}
-
if(bounds.valid()) {
- references.push_back(BVHReference(bounds, j, i, type));
+ references.push_back(
+ BVHReference(bounds,
+ j,
+ i,
+ PRIMITIVE_MOTION_TRIANGLE));
root.grow(bounds);
center.grow(bounds.center2());
}
}
+ else {
+ /* Motion triangles, trace optimized case: we split triangle
+ * primitives into separate nodes for each of the time steps.
+ * This way we minimize overlap of neighbor curve primitives.
+ */
+ const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
+ const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+ const size_t num_verts = mesh->verts.size();
+ const size_t num_steps = mesh->motion_steps;
+ const float3 *vert_steps = attr_mP->data_float3();
+ /* Calculate bounding box of the previous time step.
+ * Will be reused later to avoid duplicated work on
+ * calculating BVH time step boundbox.
+ */
+ float3 prev_verts[3];
+ t.motion_verts(verts,
+ vert_steps,
+ num_verts,
+ num_steps,
+ 0.0f,
+ prev_verts);
+ BoundBox prev_bounds = BoundBox::empty;
+ prev_bounds.grow(prev_verts[0]);
+ prev_bounds.grow(prev_verts[1]);
+ prev_bounds.grow(prev_verts[2]);
+ /* Create all primitive time steps, */
+ for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+ const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
+ float3 curr_verts[3];
+ t.motion_verts(verts,
+ vert_steps,
+ num_verts,
+ num_steps,
+ curr_time,
+ curr_verts);
+ BoundBox curr_bounds = BoundBox::empty;
+ curr_bounds.grow(curr_verts[0]);
+ curr_bounds.grow(curr_verts[1]);
+ curr_bounds.grow(curr_verts[2]);
+ BoundBox bounds = prev_bounds;
+ bounds.grow(curr_bounds);
+ if(bounds.valid()) {
+ const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+ references.push_back(
+ BVHReference(bounds,
+ j,
+ i,
+ PRIMITIVE_MOTION_TRIANGLE,
+ prev_time,
+ curr_time));
+ root.grow(bounds);
+ center.grow(bounds.center2());
+ }
+ /* Current time boundbox becomes previous one for the
+ * next time step.
+ */
+ prev_bounds = curr_bounds;
+ }
+ }
}
+}
- if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
- Attribute *curve_attr_mP = NULL;
-
- if(mesh->has_motion_blur())
- curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- size_t num_curves = mesh->num_curves();
- for(uint j = 0; j < num_curves; j++) {
- Mesh::Curve curve = mesh->get_curve(j);
- PrimitiveType type = PRIMITIVE_CURVE;
-
- for(int k = 0; k < curve.num_keys - 1; k++) {
+void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+{
+ const Attribute *curve_attr_mP = NULL;
+ if(mesh->has_motion_blur()) {
+ curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ }
+ const size_t num_curves = mesh->num_curves();
+ for(uint j = 0; j < num_curves; j++) {
+ const Mesh::Curve curve = mesh->get_curve(j);
+ const float *curve_radius = &mesh->curve_radius[0];
+ for(int k = 0; k < curve.num_keys - 1; k++) {
+ if(curve_attr_mP == NULL) {
+ /* Really simple logic for static hair. */
BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
-
- /* motion curve */
- if(curve_attr_mP) {
- size_t mesh_size = mesh->curve_keys.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *key_steps = curve_attr_mP->data_float3();
-
- for(size_t i = 0; i < steps; i++)
- curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
-
- type = PRIMITIVE_MOTION_CURVE;
- }
-
+ curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
if(bounds.valid()) {
- int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
-
+ int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
}
}
+ else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
+ /* Simple case of motion curves: single node for the while
+ * shutter time. Lowest memory usage but less optimal
+ * rendering.
+ */
+ /* TODO(sergey): Support motion steps for spatially split BVH. */
+ BoundBox bounds = BoundBox::empty;
+ curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
+ const size_t num_keys = mesh->curve_keys.size();
+ const size_t num_steps = mesh->motion_steps;
+ const float3 *key_steps = curve_attr_mP->data_float3();
+ for(size_t step = 0; step < num_steps - 1; step++) {
+ curve.bounds_grow(k,
+ key_steps + step*num_keys,
+ curve_radius,
+ bounds);
+ }
+ if(bounds.valid()) {
+ int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+ references.push_back(BVHReference(bounds,
+ j,
+ i,
+ packed_type));
+ root.grow(bounds);
+ center.grow(bounds.center2());
+ }
+ }
+ else {
+ /* Motion curves, trace optimized case: we split curve keys
+ * primitives into separate nodes for each of the time steps.
+ * This way we minimize overlap of neighbor curve primitives.
+ */
+ const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
+ const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+ const size_t num_steps = mesh->motion_steps;
+ const float3 *curve_keys = &mesh->curve_keys[0];
+ const float3 *key_steps = curve_attr_mP->data_float3();
+ const size_t num_keys = mesh->curve_keys.size();
+ /* Calculate bounding box of the previous time step.
+ * Will be reused later to avoid duplicated work on
+ * calculating BVH time step boundbox.
+ */
+ float4 prev_keys[4];
+ curve.cardinal_motion_keys(curve_keys,
+ curve_radius,
+ key_steps,
+ num_keys,
+ num_steps,
+ 0.0f,
+ k - 1, k, k + 1, k + 2,
+ prev_keys);
+ BoundBox prev_bounds = BoundBox::empty;
+ curve.bounds_grow(prev_keys, prev_bounds);
+ /* Create all primitive time steps, */
+ for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+ const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
+ float4 curr_keys[4];
+ curve.cardinal_motion_keys(curve_keys,
+ curve_radius,
+ key_steps,
+ num_keys,
+ num_steps,
+ curr_time,
+ k - 1, k, k + 1, k + 2,
+ curr_keys);
+ BoundBox curr_bounds = BoundBox::empty;
+ curve.bounds_grow(curr_keys, curr_bounds);
+ BoundBox bounds = prev_bounds;
+ bounds.grow(curr_bounds);
+ if(bounds.valid()) {
+ const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+ int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+ references.push_back(BVHReference(bounds,
+ j,
+ i,
+ packed_type,
+ prev_time,
+ curr_time));
+ root.grow(bounds);
+ center.grow(bounds.center2());
+ }
+ /* Current time boundbox becomes previous one for the
+ * next time step.
+ */
+ prev_bounds = curr_bounds;
+ }
+ }
}
}
}
+void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+{
+ if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+ add_reference_triangles(root, center, mesh, i);
+ }
+ if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+ add_reference_curves(root, center, mesh, i);
+ }
+}
+
void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
{
references.push_back(BVHReference(ob->bounds, -1, i, 0));
@@ -200,7 +357,7 @@ static size_t count_curve_segments(Mesh *mesh)
for(size_t i = 0; i < num_curves; i++)
num += mesh->get_curve(i).num_keys - 1;
-
+
return num;
}
@@ -344,6 +501,7 @@ BVHNode* BVHBuild::run()
else {
/*rotate(rootnode, 4, 5);*/
rootnode->update_visibility();
+ rootnode->update_time();
}
if(rootnode != NULL) {
VLOG(1) << "BVH build statistics:\n"
@@ -371,7 +529,7 @@ void BVHBuild::progress_update()
{
if(time_dt() - progress_start_time < 0.25)
return;
-
+
double progress_start = (double)progress_count/(double)progress_total;
double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
@@ -379,7 +537,7 @@ void BVHBuild::progress_update()
progress_start * 100.0, duplicates * 100.0);
progress.set_substatus(msg);
- progress_start_time = time_dt();
+ progress_start_time = time_dt();
}
void BVHBuild::thread_build_node(InnerNode *inner,
@@ -435,6 +593,7 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
return false;
size_t num_triangles = 0;
+ size_t num_motion_triangles = 0;
size_t num_curves = 0;
size_t num_motion_curves = 0;
@@ -445,13 +604,16 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
num_curves++;
if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
num_motion_curves++;
- else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE)
+ else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
num_triangles++;
+ else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
+ num_motion_triangles++;
}
- return (num_triangles < params.max_triangle_leaf_size) &&
- (num_curves < params.max_curve_leaf_size) &&
- (num_motion_curves < params.max_curve_leaf_size);
+ return (num_triangles <= params.max_triangle_leaf_size) &&
+ (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
+ (num_curves <= params.max_curve_leaf_size) &&
+ (num_motion_curves <= params.max_motion_curve_leaf_size);
}
/* multithreaded binning builder */
@@ -689,18 +851,24 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
prim_object[start] = ref->prim_object();
uint visibility = objects[ref->prim_object()]->visibility;
- return new LeafNode(ref->bounds(), visibility, start, start+1);
+ BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start+1);
+ leaf_node->m_time_from = ref->time_from();
+ leaf_node->m_time_to = ref->time_to();
+ return leaf_node;
}
else {
int mid = num/2;
- BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
- BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
+ BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
+ BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
BoundBox bounds = BoundBox::empty;
bounds.grow(leaf0->m_bounds);
bounds.grow(leaf1->m_bounds);
- return new InnerNode(bounds, leaf0, leaf1);
+ BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
+ inner_node->m_time_from = min(leaf0->m_time_from, leaf1->m_time_from);
+ inner_node->m_time_to = max(leaf0->m_time_to, leaf1->m_time_to);
+ return inner_node;
}
}
@@ -804,6 +972,16 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
visibility[i],
start_index,
start_index + num);
+ if(true) {
+ float time_from = 1.0f, time_to = 0.0f;
+ for(int j = 0; j < num; ++j) {
+ const BVHReference &ref = p_ref[i][j];
+ time_from = min(time_from, ref.time_from());
+ time_to = max(time_to, ref.time_to());
+ }
+ leaf_node->m_time_from = time_from;
+ leaf_node->m_time_to = time_to;
+ }
if(alignment_found) {
/* Need to recalculate leaf bounds with new alignment. */
leaf_node->m_bounds = BoundBox::empty;
@@ -918,7 +1096,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
return new InnerNode(range.bounds(), leaves[0], inner);
} else {
- /* Shpuld be doing more branches if more primitive types added. */
+ /* Should be doing more branches if more primitive types added. */
assert(num_leaves <= 5);
BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
@@ -951,7 +1129,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
/* nothing to rotate if we reached a leaf node. */
if(node->is_leaf() || max_depth < 0)
return;
-
+
InnerNode *parent = (InnerNode*)node;
/* rotate all children first */
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 6418034..ee3cde6 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -63,6 +63,8 @@ protected:
friend class BVHObjectBinning;
/* Adding references. */
+ void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
+ void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
void add_references(BVHRange& root);
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index f5cd699..67580e1 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -176,6 +176,19 @@ uint BVHNode::update_visibility()
return m_visibility;
}
+void BVHNode::update_time()
+{
+ if(!is_leaf()) {
+ InnerNode *inner = (InnerNode*)this;
+ BVHNode *child0 = inner->children[0];
+ BVHNode *child1 = inner->children[1];
+ child0->update_time();
+ child1->update_time();
+ m_time_from = min(child0->m_time_from, child1->m_time_from);
+ m_time_to = max(child0->m_time_to, child1->m_time_to);
+ }
+}
+
/* Inner Node */
void InnerNode::print(int depth) const
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index 2faa40a..090c426 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -47,7 +47,9 @@ class BVHNode
{
public:
BVHNode() : m_is_unaligned(false),
- m_aligned_space(NULL)
+ m_aligned_space(NULL),
+ m_time_from(0.0f),
+ m_time_to(1.0f)
{
}
@@ -91,12 +93,15 @@ public:
void deleteSubtree();
uint update_visibility();
+ void update_time();
bool m_is_unaligned;
// TODO(sergey): Can be stored as 3x3 matrix, but better to have some
// utilities and type defines in util_transform first.
Transform *m_aligned_space;
+
+ float m_time_from, m_time_to;
};
class InnerNode : public BVHNode
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index 2e698a8..65f9da1 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -43,7 +43,9 @@ public:
/* number of primitives in leaf */
int min_leaf_size;
int max_triangle_leaf_size;
+ int max_motion_triangle_leaf_size;
int max_curve_leaf_size;
+ int max_motion_curve_leaf_size;
/* object or mesh level bvh */
bool top_level;
@@ -59,6 +61,17 @@ public:
*/
bool use_unaligned_nodes;
+ /* Split time range to this number of steps and create leaf node for each
+ * of this time steps.
+ *
+ * Speeds up rendering of motion curve primitives in the cost of higher
+ * memory usage.
+ */
+ int num_motion_curve_steps;
+
+ /* Same as above, but for triangle primitives. */
+ int num_motion_triangle_steps;
+
/* fixed parameters */
enum {
MAX_DEPTH = 64,
@@ -80,13 +93,17 @@ public:
min_leaf_size = 1;
max_triangle_leaf_size = 8;
- max_curve_leaf_size = 2;
+ max_motion_triangle_leaf_size = 8;
+ max_curve_leaf_size = 1;
+ max_motion_curve_leaf_size = 4;
top_level = false;
use_qbvh = false;
use_unaligned_nodes = false;
primitive_mask = PRIMITIVE_ALL;
+
+ num_motion_curve_steps = 0;
}
/* SAH costs */
@@ -113,8 +130,15 @@ class BVHReference
public:
__forceinline BVHReference() {}
- __forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_type)
- : rbounds(bounds_)
+ __forceinline BVHReference(const BoundBox& bounds_,
+ int prim_index_,
+ int prim_object_,
+ int prim_type,
+ float time_from = 0.0f,
+ float time_to = 1.0f)
+ : rbounds(bounds_),
+ time_from_(time_from),
+ time_to_(time_to)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
@@ -125,6 +149,9 @@ public:
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
__forceinline int prim_type() const { return type; }
+ __forceinline float time_from() const { return time_from_; }
+ __forceinline float time_to() const { return time_to_; }
+
BVHReference& operator=(const BVHReference &arg) {
if(&arg != this) {
@@ -133,9 +160,11 @@ public:
return *this;
}
+
protected:
BoundBox rbounds;
uint type;
+ float time_from_, time_to_;
};
/* BVH Range
diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake
index 616dd94..403a054 100644
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -44,6 +44,10 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
else()
message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES")
set(WITH_CYCLES_CUDA_BINARIES OFF)
+ if(NOT WITH_CUDA_DYNLOAD)
+ message(STATUS "Additionally falling back to dynamic CUDA load")
+ set(WITH_CUDA_DYNLOAD ON)
+ endif()
endif()
endif()
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index c34677e..966ff5e 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -36,6 +36,15 @@ set(SRC
device_task.cpp
)
+set(SRC_OPENCL
+ opencl/opencl.h
+
+ opencl/opencl_base.cpp
+ opencl/opencl_mega.cpp
+ opencl/opencl_split.cpp
+ opencl/opencl_util.cpp
+)
+
if(WITH_CYCLES_NETWORK)
list(APPEND SRC
device_network.cpp
@@ -67,4 +76,4 @@ endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
-add_library(cycles_device ${SRC} ${SRC_HEADERS})
+add_library(cycles_device ${SRC} ${SRC_OPENCL} ${SRC_HEADERS})
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 909ec7a..31c99f4 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -64,6 +64,8 @@ std::ostream& operator <<(std::ostream &os,
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
os << "Use Patch Evaluation: "
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
+ os << "Use Transparent Shadows: "
+ << string_from_bool(requested_features.use_transparent) << std::endl;
return os;
}
@@ -258,33 +260,33 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
DeviceType Device::type_from_string(const char *name)
{
- if(strcmp(name, "cpu") == 0)
+ if(strcmp(name, "CPU") == 0)
return DEVICE_CPU;
- else if(strcmp(name, "cuda") == 0)
+ else if(strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
- else if(strcmp(name, "opencl") == 0)
+ else if(strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
- else if(strcmp(name, "network") == 0)
+ else if(strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK;
- else if(strcmp(name, "multi") == 0)
+ else if(strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
-
+
return DEVICE_NONE;
}
string Device::string_from_type(DeviceType type)
{
if(type == DEVICE_CPU)
- return "cpu";
+ return "CPU";
else if(type == DEVICE_CUDA)
- return "cuda";
+ return "CUDA";
else if(type == DEVICE_OPENCL)
- return "opencl";
+ return "OPENCL";
else if(type == DEVICE_NETWORK)
- return "network";
+ return "NETWORK";
else if(type == DEVICE_MULTI)
- return "multi";
-
+ return "MULTI";
+
return "";
}
@@ -307,9 +309,6 @@ vector<DeviceType>& Device::available_types()
#ifdef WITH_NETWORK
types.push_back(DEVICE_NETWORK);
#endif
-#ifdef WITH_MULTI
- types.push_back(DEVICE_MULTI);
-#endif
need_types_update = false;
}
@@ -331,10 +330,6 @@ vector<DeviceInfo>& Device::available_devices()
device_opencl_info(devices);
#endif
-#ifdef WITH_MULTI
- device_multi_info(devices);
-#endif
-
device_cpu_info(devices);
#ifdef WITH_NETWORK
@@ -368,6 +363,29 @@ string Device::device_capabilities()
return capabilities;
}
+DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
+{
+ assert(subdevices.size() > 1);
+
+ DeviceInfo info;
+ info.type = DEVICE_MULTI;
+ info.id = "MULTI";
+ info.description = "Multi Device";
+ info.multi_devices = subdevices;
+ info.num = 0;
+
+ info.has_bindless_textures = true;
+ info.pack_images = false;
+ foreach(DeviceInfo &device, subdevices) {
+ assert(device.type == info.multi_devices[0].type);
+
+ info.pack_images |= device.pack_images;
+ info.has_bindless_textures &= device.has_bindless_textures;
+ }
+
+ return info;
+}
+
void Device::tag_update()
{
need_types_update = true;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 77dc1fa..ccee25a 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -49,7 +49,7 @@ class DeviceInfo {
public:
DeviceType type;
string description;
- string id;
+ string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device;
bool advanced_shading;
@@ -69,6 +69,12 @@ public:
has_bindless_textures = false;
use_split_kernel = false;
}
+
+ bool operator==(const DeviceInfo &info) {
+ /* Multiple Devices with the same ID would be very bad. */
+ assert(id != info.id || (type == info.type && num == info.num && description == info.description));
+ return id == info.id;
+ }
};
class DeviceRequestedFeatures {
@@ -111,6 +117,9 @@ public:
/* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation;
+
+ /* Use Transparent shadows */
+ bool use_transparent;
DeviceRequestedFeatures()
{
@@ -127,6 +136,7 @@ public:
use_volume = false;
use_integrator_branched = false;
use_patch_evaluation = false;
+ use_transparent = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
@@ -142,7 +152,8 @@ public:
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched &&
- use_patch_evaluation == requested_features.use_patch_evaluation);
+ use_patch_evaluation == requested_features.use_patch_evaluation &&
+ use_transparent == requested_features.use_transparent);
}
/* Convert the requested features structure to a build options,
@@ -183,6 +194,9 @@ public:
if(!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
+ if(!use_transparent) {
+ build_options += " -D__NO_TRANSPARENT__";
+ }
return build_options;
}
};
@@ -214,6 +228,7 @@ public:
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
+ virtual bool show_samples() const { return false; }
/* statistics */
Stats &stats;
@@ -282,6 +297,7 @@ public:
static vector<DeviceType>& available_types();
static vector<DeviceInfo>& available_devices();
static string device_capabilities();
+ static DeviceInfo get_multi_device(vector<DeviceInfo> subdevices);
/* Tag devices lists for update. */
static void tag_update();
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index aed86d8..c8e001e 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -112,6 +112,11 @@ public:
task_pool.stop();
}
+ virtual bool show_samples() const
+ {
+ return (TaskScheduler::num_threads() == 1);
+ }
+
void mem_alloc(device_memory& mem, MemoryType /*type*/)
{
mem.device_pointer = mem.data_pointer;
@@ -275,7 +280,7 @@ public:
tile.sample = sample + 1;
- task.update_progress(&tile);
+ task.update_progress(&tile, tile.w*tile.h);
}
task.release_tile(tile);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index a968a81..233f94b 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <climits>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -114,6 +115,12 @@ public:
return path_exists(cubins_path);
}
+ virtual bool show_samples() const
+ {
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+ return true;
+ }
+
/*#ifdef NDEBUG
#define cuda_abort()
#else
@@ -213,7 +220,8 @@ public:
return;
int major, minor;
- cuDeviceComputeCapability(&major, &minor, cuDevId);
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
cuDevArchitecture = major*100 + minor*10;
cuda_pop_context();
@@ -233,7 +241,8 @@ public:
bool support_device(const DeviceRequestedFeatures& /*requested_features*/)
{
int major, minor;
- cuDeviceComputeCapability(&major, &minor, cuDevId);
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
/* We only support sm_20 and above */
if(major < 2) {
@@ -315,7 +324,8 @@ public:
{
/* Compute cubin name. */
int major, minor;
- cuDeviceComputeCapability(&major, &minor, cuDevId);
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
/* Attempt to use kernel provided with Blender. */
if(!use_adaptive_compilation()) {
@@ -343,7 +353,7 @@ public:
const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin",
major, minor,
cubin_md5.c_str());
- const string cubin = path_user_get(path_join("cache", cubin_file));
+ const string cubin = path_cache_get(path_join("kernels", cubin_file));
VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
if(path_exists(cubin)) {
VLOG(1) << "Using locally compiled kernel.";
@@ -1263,7 +1273,7 @@ public:
tile.sample = sample + 1;
- task->update_progress(&tile);
+ task->update_progress(&tile, tile.w*tile.h);
}
task->release_tile(tile);
@@ -1394,8 +1404,8 @@ void device_cuda_info(vector<DeviceInfo>& devices)
if(cuDeviceGetName(name, 256, num) != CUDA_SUCCESS)
continue;
- int major, minor;
- cuDeviceComputeCapability(&major, &minor, num);
+ int major;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
if(major < 2) {
continue;
}
@@ -1404,13 +1414,22 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.type = DEVICE_CUDA;
info.description = string(name);
- info.id = string_printf("CUDA_%d", num);
info.num = num;
info.advanced_shading = (major >= 2);
info.has_bindless_textures = (major >= 3);
info.pack_images = false;
+ int pci_location[3] = {0, 0, 0};
+ cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
+ cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
+ cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
+ info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
+ name,
+ (unsigned int)pci_location[0],
+ (unsigned int)pci_location[1],
+ (unsigned int)pci_location[2]);
+
/* if device has a kernel timeout, assume it is used for display */
if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {
info.description += " (Display)";
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 47584ae..de48764 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -33,7 +33,6 @@ void device_cpu_info(vector<DeviceInfo>& devices);
void device_opencl_info(vector<DeviceInfo>& devices);
void device_cuda_info(vector<DeviceInfo>& devices);
void device_network_info(vector<DeviceInfo>& devices);
-void device_multi_info(vector<DeviceInfo>& devices);
string device_cpu_capabilities(void);
string device_opencl_capabilities(void);
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index ef25735..31b8006 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -89,6 +89,14 @@ public:
return error_msg;
}
+ virtual bool show_samples() const
+ {
+ if(devices.size() > 1) {
+ return false;
+ }
+ return devices.front().device->show_samples();
+ }
+
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
foreach(SubDevice& sub, devices)
@@ -350,120 +358,5 @@ Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background)
return new MultiDevice(info, stats, background);
}
-static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool with_display, bool with_advanced_shading, const char *id_fmt, int num)
-{
- DeviceInfo info;
-
- /* create map to find duplicate descriptions */
- map<string, int> dupli_map;
- map<string, int>::iterator dt;
- int num_added = 0, num_display = 0;
-
- info.advanced_shading = with_advanced_shading;
- info.pack_images = false;
- info.has_bindless_textures = true;
-
- foreach(DeviceInfo& subinfo, devices) {
- if(subinfo.type == type) {
- if(subinfo.advanced_shading != info.advanced_shading)
- continue;
- if(subinfo.display_device) {
- if(with_display)
- num_display++;
- else
- continue;
- }
-
- string key = subinfo.description;
-
- if(dupli_map.find(key) == dupli_map.end())
- dupli_map[key] = 1;
- else
- dupli_map[key]++;
-
- info.multi_devices.push_back(subinfo);
- if(subinfo.display_device)
- info.display_device = true;
- info.pack_images = info.pack_images || subinfo.pack_images;
- info.has_bindless_textures = info.has_bindless_textures && subinfo.has_bindless_textures;
- num_added++;
- }
- }
-
- if(num_added <= 1 || (with_display && num_display == 0))
- return false;
-
- /* generate string */
- stringstream desc;
- vector<string> last_tokens;
- bool first = true;
-
- for(dt = dupli_map.begin(); dt != dupli_map.end(); dt++) {
- if(!first) desc << " + ";
- first = false;
-
- /* get name and count */
- string name = dt->first;
- int count = dt->second;
-
- /* strip common prefixes */
- vector<string> tokens;
- string_split(tokens, dt->first);
-
- if(tokens.size() > 1) {
- int i;
-
- for(i = 0; i < tokens.size() && i < last_tokens.size(); i++)
- if(tokens[i] != last_tokens[i])
- break;
-
- name = "";
- for(; i < tokens.size(); i++) {
- name += tokens[i];
- if(i != tokens.size() - 1)
- name += " ";
- }
- }
-
- last_tokens = tokens;
-
- /* add */
- if(count > 1)
- desc << name << " (" << count << "x)";
- else
- desc << name;
- }
-
- /* add info */
- info.type = DEVICE_MULTI;
- info.description = desc.str();
- info.id = string_printf(id_fmt, num);
- info.display_device = with_display;
- info.num = 0;
-
- if(with_display)
- devices.push_back(info);
- else
- devices.insert(devices.begin(), info);
-
- return true;
-}
-
-void device_multi_info(vector<DeviceInfo>& devices)
-{
- int num = 0;
-
- if(!device_multi_add(devices, DEVICE_CUDA, false, true, "CUDA_MULTI_%d", num++))
- device_multi_add(devices, DEVICE_CUDA, false, false, "CUDA_MULTI_%d", num++);
- if(!device_multi_add(devices, DEVICE_CUDA, true, true, "CUDA_MULTI_%d", num++))
- device_multi_add(devices, DEVICE_CUDA, true, false, "CUDA_MULTI_%d", num++);
-
- num = 0;
- if(!device_multi_add(devices, DEVICE_OPENCL, false, true, "OPENCL_MULTI_%d", num++))
- device_multi_add(devices, DEVICE_OPENCL, false, false, "OPENCL_MULTI_%d", num++);
- if(!device_multi_add(devices, DEVICE_OPENCL, true, true, "OPENCL_MULTI_%d", num++))
- device_multi_add(devices, DEVICE_OPENCL, true, false, "OPENCL_MULTI_%d", num++);
-}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 3eb5ad2..53eef6c 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -51,6 +51,11 @@ public:
thread_mutex rpc_lock;
+ virtual bool show_samples() const
+ {
+ return false;
+ }
+
NetworkDevice(DeviceInfo& info, Stats &stats, const char *address)
: Device(info, stats, true), socket(io_service)
{
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index dce1d37..ba94c59 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -16,3275 +16,29 @@
#ifdef WITH_OPENCL
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include "opencl/opencl.h"
-#include "clew.h"
-
-#include "device.h"
-#include "device_intern.h"
-
-#include "buffers.h"
-
-#include "util_debug.h"
-#include "util_foreach.h"
-#include "util_logging.h"
-#include "util_map.h"
-#include "util_math.h"
-#include "util_md5.h"
-#include "util_opengl.h"
-#include "util_path.h"
-#include "util_time.h"
-
-CCL_NAMESPACE_BEGIN
-
-#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
-
-/* Macro declarations used with split kernel */
-
-/* Macro to enable/disable work-stealing */
-#define __WORK_STEALING__
-
-#define SPLIT_KERNEL_LOCAL_SIZE_X 64
-#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
-
-/* This value may be tuned according to the scene we are rendering.
- *
- * Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
- * ray-bounces will improve performance.
- */
-#define PATH_ITER_INC_FACTOR 8
-
-/* When allocate global memory in chunks. We may not be able to
- * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
- * Since some bytes may be needed for aligning chunks of memory;
- * This is the amount of memory that we dedicate for that purpose.
- */
-#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
-
-struct OpenCLPlatformDevice {
- OpenCLPlatformDevice(cl_platform_id platform_id,
- const string& platform_name,
- cl_device_id device_id,
- cl_device_type device_type,
- const string& device_name)
- : platform_id(platform_id),
- platform_name(platform_name),
- device_id(device_id),
- device_type(device_type),
- device_name(device_name) {}
- cl_platform_id platform_id;
- string platform_name;
- cl_device_id device_id;
- cl_device_type device_type;
- string device_name;
-};
-
-namespace {
-
-cl_device_type opencl_device_type()
-{
- switch(DebugFlags().opencl.device_type)
- {
- case DebugFlags::OpenCL::DEVICE_NONE:
- return 0;
- case DebugFlags::OpenCL::DEVICE_ALL:
- return CL_DEVICE_TYPE_ALL;
- case DebugFlags::OpenCL::DEVICE_DEFAULT:
- return CL_DEVICE_TYPE_DEFAULT;
- case DebugFlags::OpenCL::DEVICE_CPU:
- return CL_DEVICE_TYPE_CPU;
- case DebugFlags::OpenCL::DEVICE_GPU:
- return CL_DEVICE_TYPE_GPU;
- case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
- return CL_DEVICE_TYPE_ACCELERATOR;
- default:
- return CL_DEVICE_TYPE_ALL;
- }
-}
-
-inline bool opencl_kernel_use_debug()
-{
- return DebugFlags().opencl.debug;
-}
-
-bool opencl_kernel_use_advanced_shading(const string& platform)
-{
- /* keep this in sync with kernel_types.h! */
- if(platform == "NVIDIA CUDA")
- return true;
- else if(platform == "Apple")
- return true;
- else if(platform == "AMD Accelerated Parallel Processing")
- return true;
- else if(platform == "Intel(R) OpenCL")
- return true;
- /* Make sure officially unsupported OpenCL platforms
- * does not set up to use advanced shading.
- */
- return false;
-}
-
-bool opencl_kernel_use_split(const string& platform_name,
- const cl_device_type device_type)
-{
- if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
- VLOG(1) << "Forcing split kernel to use.";
- return true;
- }
- if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
- VLOG(1) << "Forcing mega kernel to use.";
- return false;
- }
- /* TODO(sergey): Replace string lookups with more enum-like API,
- * similar to device/vendor checks blender's gpu.
- */
- if(platform_name == "AMD Accelerated Parallel Processing" &&
- device_type == CL_DEVICE_TYPE_GPU)
- {
- return true;
- }
- return false;
-}
-
-bool opencl_device_supported(const string& platform_name,
- const cl_device_id device_id)
-{
- cl_device_type device_type;
- clGetDeviceInfo(device_id,
- CL_DEVICE_TYPE,
- sizeof(cl_device_type),
- &device_type,
- NULL);
- if(platform_name == "AMD Accelerated Parallel Processing" &&
- device_type == CL_DEVICE_TYPE_GPU)
- {
- return true;
- }
- if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
- return true;
- }
- return false;
-}
-
-bool opencl_platform_version_check(cl_platform_id platform,
- string *error = NULL)
-{
- const int req_major = 1, req_minor = 1;
- int major, minor;
- char version[256];
- clGetPlatformInfo(platform,
- CL_PLATFORM_VERSION,
- sizeof(version),
- &version,
- NULL);
- if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
- if(error != NULL) {
- *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
- }
- return false;
- }
- if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
- if(error != NULL) {
- *error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
- }
- return false;
- }
- if(error != NULL) {
- *error = "";
- }
- return true;
-}
-
-bool opencl_device_version_check(cl_device_id device,
- string *error = NULL)
-{
- const int req_major = 1, req_minor = 1;
- int major, minor;
- char version[256];
- clGetDeviceInfo(device,
- CL_DEVICE_OPENCL_C_VERSION,
- sizeof(version),
- &version,
- NULL);
- if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
- if(error != NULL) {
- *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
- }
- return false;
- }
- if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
- if(error != NULL) {
- *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
- }
- return false;
- }
- if(error != NULL) {
- *error = "";
- }
- return true;
-}
-
-void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
-{
- const bool force_all_platforms =
- (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
- const cl_device_type device_type = opencl_device_type();
- static bool first_time = true;
-#define FIRST_VLOG(severity) if(first_time) VLOG(severity)
-
- usable_devices->clear();
-
- if(device_type == 0) {
- FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
- first_time = false;
- return;
- }
-
- vector<cl_device_id> device_ids;
- cl_uint num_devices = 0;
- vector<cl_platform_id> platform_ids;
- cl_uint num_platforms = 0;
-
- /* Get devices. */
- if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS ||
- num_platforms == 0)
- {
- FIRST_VLOG(2) << "No OpenCL platforms were found.";
- first_time = false;
- return;
- }
- platform_ids.resize(num_platforms);
- if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) {
- FIRST_VLOG(2) << "Failed to fetch platform IDs from the driver..";
- first_time = false;
- return;
- }
- /* Devices are numbered consecutively across platforms. */
- for(int platform = 0; platform < num_platforms; platform++) {
- cl_platform_id platform_id = platform_ids[platform];
- char pname[256];
- if(clGetPlatformInfo(platform_id,
- CL_PLATFORM_NAME,
- sizeof(pname),
- &pname,
- NULL) != CL_SUCCESS)
- {
- FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
- continue;
- }
- string platform_name = pname;
- FIRST_VLOG(2) << "Enumerating devices for platform "
- << platform_name << ".";
- if(!opencl_platform_version_check(platform_id)) {
- FIRST_VLOG(2) << "Ignoring platform " << platform_name
- << " due to too old compiler version.";
- continue;
- }
- num_devices = 0;
- if(clGetDeviceIDs(platform_id,
- device_type,
- 0,
- NULL,
- &num_devices) != CL_SUCCESS || num_devices == 0)
- {
- FIRST_VLOG(2) << "Ignoring platform " << platform_name
- << ", failed to fetch number of devices.";
- continue;
- }
- device_ids.resize(num_devices);
- if(clGetDeviceIDs(platform_id,
- device_type,
- num_devices,
- &device_ids[0],
- NULL) != CL_SUCCESS)
- {
- FIRST_VLOG(2) << "Ignoring platform " << platform_name
- << ", failed to fetch devices list.";
- continue;
- }
- for(int num = 0; num < num_devices; num++) {
- cl_device_id device_id = device_ids[num];
- char device_name[1024] = "\0";
- if(clGetDeviceInfo(device_id,
- CL_DEVICE_NAME,
- sizeof(device_name),
- &device_name,
- NULL) != CL_SUCCESS)
- {
- FIRST_VLOG(2) << "Failed to fetch device name, ignoring.";
- continue;
- }
- if(!opencl_device_version_check(device_id)) {
- FIRST_VLOG(2) << "Ignoring device " << device_name
- << " due to old compiler version.";
- continue;
- }
- if(force_all_platforms ||
- opencl_device_supported(platform_name, device_id))
- {
- cl_device_type device_type;
- if(clGetDeviceInfo(device_id,
- CL_DEVICE_TYPE,
- sizeof(cl_device_type),
- &device_type,
- NULL) != CL_SUCCESS)
- {
- FIRST_VLOG(2) << "Ignoring device " << device_name
- << ", failed to fetch device type.";
- continue;
- }
- FIRST_VLOG(2) << "Adding new device " << device_name << ".";
- usable_devices->push_back(OpenCLPlatformDevice(platform_id,
- platform_name,
- device_id,
- device_type,
- device_name));
- }
- else {
- FIRST_VLOG(2) << "Ignoring device " << device_name
- << ", not officially supported yet.";
- }
- }
- }
- first_time = false;
-}
-
-} /* namespace */
-
-/* Thread safe cache for contexts and programs.
- *
- * TODO(sergey): Make it more generous, so it can contain any type of program
- * without hardcoding possible program types in the slot.
- */
-class OpenCLCache
-{
- struct Slot
- {
- thread_mutex *mutex;
- cl_context context;
- /* cl_program for shader, bake, film_convert kernels (used in OpenCLDeviceBase) */
- cl_program ocl_dev_base_program;
- /* cl_program for megakernel (used in OpenCLDeviceMegaKernel) */
- cl_program ocl_dev_megakernel_program;
-
- Slot() : mutex(NULL),
- context(NULL),
- ocl_dev_base_program(NULL),
- ocl_dev_megakernel_program(NULL) {}
-
- Slot(const Slot& rhs)
- : mutex(rhs.mutex),
- context(rhs.context),
- ocl_dev_base_program(rhs.ocl_dev_base_program),
- ocl_dev_megakernel_program(rhs.ocl_dev_megakernel_program)
- {
- /* copy can only happen in map insert, assert that */
- assert(mutex == NULL);
- }
-
- ~Slot()
- {
- delete mutex;
- mutex = NULL;
- }
- };
-
- /* key is combination of platform ID and device ID */
- typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
-
- /* map of Slot objects */
- typedef map<PlatformDevicePair, Slot> CacheMap;
- CacheMap cache;
-
- thread_mutex cache_lock;
-
- /* lazy instantiate */
- static OpenCLCache &global_instance()
- {
- static OpenCLCache instance;
- return instance;
- }
-
- OpenCLCache()
- {
- }
-
- ~OpenCLCache()
- {
- /* Intel OpenCL bug raises SIGABRT due to pure virtual call
- * so this is disabled. It's not necessary to free objects
- * at process exit anyway.
- * http://software.intel.com/en-us/forums/topic/370083#comments */
-
- //flush();
- }
-
- /* lookup something in the cache. If this returns NULL, slot_locker
- * will be holding a lock for the cache. slot_locker should refer to a
- * default constructed thread_scoped_lock */
- template<typename T>
- static T get_something(cl_platform_id platform,
- cl_device_id device,
- T Slot::*member,
- thread_scoped_lock& slot_locker)
- {
- assert(platform != NULL);
-
- OpenCLCache& self = global_instance();
-
- thread_scoped_lock cache_lock(self.cache_lock);
-
- pair<CacheMap::iterator,bool> ins = self.cache.insert(
- CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
-
- Slot &slot = ins.first->second;
-
- /* create slot lock only while holding cache lock */
- if(!slot.mutex)
- slot.mutex = new thread_mutex;
-
- /* need to unlock cache before locking slot, to allow store to complete */
- cache_lock.unlock();
-
- /* lock the slot */
- slot_locker = thread_scoped_lock(*slot.mutex);
-
- /* If the thing isn't cached */
- if(slot.*member == NULL) {
- /* return with the caller's lock holder holding the slot lock */
- return NULL;
- }
-
- /* the item was already cached, release the slot lock */
- slot_locker.unlock();
-
- return slot.*member;
- }
-
- /* store something in the cache. you MUST have tried to get the item before storing to it */
- template<typename T>
- static void store_something(cl_platform_id platform,
- cl_device_id device,
- T thing,
- T Slot::*member,
- thread_scoped_lock& slot_locker)
- {
- assert(platform != NULL);
- assert(device != NULL);
- assert(thing != NULL);
-
- OpenCLCache &self = global_instance();
-
- thread_scoped_lock cache_lock(self.cache_lock);
- CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
- cache_lock.unlock();
-
- Slot &slot = i->second;
-
- /* sanity check */
- assert(i != self.cache.end());
- assert(slot.*member == NULL);
-
- slot.*member = thing;
-
- /* unlock the slot */
- slot_locker.unlock();
- }
-
-public:
-
- enum ProgramName {
- OCL_DEV_BASE_PROGRAM,
- OCL_DEV_MEGAKERNEL_PROGRAM,
- };
-
- /* see get_something comment */
- static cl_context get_context(cl_platform_id platform,
- cl_device_id device,
- thread_scoped_lock& slot_locker)
- {
- cl_context context = get_something<cl_context>(platform,
- device,
- &Slot::context,
- slot_locker);
-
- if(!context)
- return NULL;
-
- /* caller is going to release it when done with it, so retain it */
- cl_int ciErr = clRetainContext(context);
- assert(ciErr == CL_SUCCESS);
- (void)ciErr;
-
- return context;
- }
-
- /* see get_something comment */
- static cl_program get_program(cl_platform_id platform,
- cl_device_id device,
- ProgramName program_name,
- thread_scoped_lock& slot_locker)
- {
- cl_program program = NULL;
-
- switch(program_name) {
- case OCL_DEV_BASE_PROGRAM:
- /* Get program related to OpenCLDeviceBase */
- program = get_something<cl_program>(platform,
- device,
- &Slot::ocl_dev_base_program,
- slot_locker);
- break;
- case OCL_DEV_MEGAKERNEL_PROGRAM:
- /* Get program related to megakernel */
- program = get_something<cl_program>(platform,
- device,
- &Slot::ocl_dev_megakernel_program,
- slot_locker);
- break;
- default:
- assert(!"Invalid program name");
- }
-
- if(!program)
- return NULL;
-
- /* caller is going to release it when done with it, so retain it */
- cl_int ciErr = clRetainProgram(program);
- assert(ciErr == CL_SUCCESS);
- (void)ciErr;
-
- return program;
- }
-
- /* see store_something comment */
- static void store_context(cl_platform_id platform,
- cl_device_id device,
- cl_context context,
- thread_scoped_lock& slot_locker)
- {
- store_something<cl_context>(platform,
- device,
- context,
- &Slot::context,
- slot_locker);
-
- /* increment reference count in OpenCL.
- * The caller is going to release the object when done with it. */
- cl_int ciErr = clRetainContext(context);
- assert(ciErr == CL_SUCCESS);
- (void)ciErr;
- }
-
- /* see store_something comment */
- static void store_program(cl_platform_id platform,
- cl_device_id device,
- cl_program program,
- ProgramName program_name,
- thread_scoped_lock& slot_locker)
- {
- switch(program_name) {
- case OCL_DEV_BASE_PROGRAM:
- store_something<cl_program>(platform,
- device,
- program,
- &Slot::ocl_dev_base_program,
- slot_locker);
- break;
- case OCL_DEV_MEGAKERNEL_PROGRAM:
- store_something<cl_program>(platform,
- device,
- program,
- &Slot::ocl_dev_megakernel_program,
- slot_locker);
- break;
- default:
- assert(!"Invalid program name\n");
- return;
- }
-
- /* Increment reference count in OpenCL.
- * The caller is going to release the object when done with it.
- */
- cl_int ciErr = clRetainProgram(program);
- assert(ciErr == CL_SUCCESS);
- (void)ciErr;
- }
-
- /* Discard all cached contexts and programs. */
- static void flush()
- {
- OpenCLCache &self = global_instance();
- thread_scoped_lock cache_lock(self.cache_lock);
-
- foreach(CacheMap::value_type &item, self.cache) {
- if(item.second.ocl_dev_base_program != NULL)
- clReleaseProgram(item.second.ocl_dev_base_program);
- if(item.second.ocl_dev_megakernel_program != NULL)
- clReleaseProgram(item.second.ocl_dev_megakernel_program);
- if(item.second.context != NULL)
- clReleaseContext(item.second.context);
- }
-
- self.cache.clear();
- }
-};
-
-class OpenCLDeviceBase : public Device
-{
-public:
- DedicatedTaskPool task_pool;
- cl_context cxContext;
- cl_command_queue cqCommandQueue;
- cl_platform_id cpPlatform;
- cl_device_id cdDevice;
- cl_program cpProgram;
- cl_kernel ckFilmConvertByteKernel;
- cl_kernel ckFilmConvertHalfFloatKernel;
- cl_kernel ckShaderKernel;
- cl_kernel ckBakeKernel;
- cl_int ciErr;
-
- typedef map<string, device_vector<uchar>*> ConstMemMap;
- typedef map<string, device_ptr> MemMap;
-
- ConstMemMap const_mem_map;
- MemMap mem_map;
- device_ptr null_mem;
-
- bool device_initialized;
- string platform_name;
-
- bool opencl_error(cl_int err)
- {
- if(err != CL_SUCCESS) {
- string message = string_printf("OpenCL error (%d): %s", err, clewErrorString(err));
- if(error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- return true;
- }
-
- return false;
- }
-
- void opencl_error(const string& message)
- {
- if(error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- }
-
-#define opencl_assert(stmt) \
- { \
- cl_int err = stmt; \
- \
- if(err != CL_SUCCESS) { \
- string message = string_printf("OpenCL error: %s in %s", clewErrorString(err), #stmt); \
- if(error_msg == "") \
- error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- } \
- } (void)0
-
- void opencl_assert_err(cl_int err, const char* where)
- {
- if(err != CL_SUCCESS) {
- string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
- if(error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
-#ifndef NDEBUG
- abort();
-#endif
- }
- }
-
- OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
- : Device(info, stats, background_)
- {
- cpPlatform = NULL;
- cdDevice = NULL;
- cxContext = NULL;
- cqCommandQueue = NULL;
- cpProgram = NULL;
- ckFilmConvertByteKernel = NULL;
- ckFilmConvertHalfFloatKernel = NULL;
- ckShaderKernel = NULL;
- ckBakeKernel = NULL;
- null_mem = 0;
- device_initialized = false;
-
- vector<OpenCLPlatformDevice> usable_devices;
- opencl_get_usable_devices(&usable_devices);
- if(usable_devices.size() == 0) {
- opencl_error("OpenCL: no devices found.");
- return;
- }
- assert(info.num < usable_devices.size());
- OpenCLPlatformDevice& platform_device = usable_devices[info.num];
- cpPlatform = platform_device.platform_id;
- cdDevice = platform_device.device_id;
- platform_name = platform_device.platform_name;
- VLOG(2) << "Creating new Cycles device for OpenCL platform "
- << platform_name << ", device "
- << platform_device.device_name << ".";
-
- {
- /* try to use cached context */
- thread_scoped_lock cache_locker;
- cxContext = OpenCLCache::get_context(cpPlatform, cdDevice, cache_locker);
-
- if(cxContext == NULL) {
- /* create context properties array to specify platform */
- const cl_context_properties context_props[] = {
- CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
- 0, 0
- };
-
- /* create context */
- cxContext = clCreateContext(context_props, 1, &cdDevice,
- context_notify_callback, cdDevice, &ciErr);
-
- if(opencl_error(ciErr)) {
- opencl_error("OpenCL: clCreateContext failed");
- return;
- }
-
- /* cache it */
- OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker);
- }
- }
-
- cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
- if(opencl_error(ciErr))
- return;
-
- null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
- if(opencl_error(ciErr))
- return;
-
- fprintf(stderr, "Device init success\n");
- device_initialized = true;
- }
-
- static void CL_CALLBACK context_notify_callback(const char *err_info,
- const void * /*private_info*/, size_t /*cb*/, void *user_data)
- {
- char name[256];
- clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL);
-
- fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info);
- }
-
- bool opencl_version_check()
- {
- string error;
- if(!opencl_platform_version_check(cpPlatform, &error)) {
- opencl_error(error);
- return false;
- }
- if(!opencl_device_version_check(cdDevice, &error)) {
- opencl_error(error);
- return false;
- }
- return true;
- }
-
- bool load_binary(const string& /*kernel_path*/,
- const string& clbin,
- const string& custom_kernel_build_options,
- cl_program *program,
- const string *debug_src = NULL)
- {
- /* read binary into memory */
- vector<uint8_t> binary;
-
- if(!path_read_binary(clbin, binary)) {
- opencl_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
- return false;
- }
-
- /* create program */
- cl_int status;
- size_t size = binary.size();
- const uint8_t *bytes = &binary[0];
-
- *program = clCreateProgramWithBinary(cxContext, 1, &cdDevice,
- &size, &bytes, &status, &ciErr);
-
- if(opencl_error(status) || opencl_error(ciErr)) {
- opencl_error(string_printf("OpenCL failed create program from cached binary %s.", clbin.c_str()));
- return false;
- }
-
- if(!build_kernel(program, custom_kernel_build_options, debug_src))
- return false;
-
- return true;
- }
-
- bool save_binary(cl_program *program, const string& clbin)
- {
- size_t size = 0;
- clGetProgramInfo(*program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
-
- if(!size)
- return false;
-
- vector<uint8_t> binary(size);
- uint8_t *bytes = &binary[0];
-
- clGetProgramInfo(*program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
-
- if(!path_write_binary(clbin, binary)) {
- opencl_error(string_printf("OpenCL failed to write cached binary %s.", clbin.c_str()));
- return false;
- }
-
- return true;
- }
-
- bool build_kernel(cl_program *kernel_program,
- const string& custom_kernel_build_options,
- const string *debug_src = NULL)
- {
- string build_options;
- build_options = kernel_build_options(debug_src) + custom_kernel_build_options;
-
- ciErr = clBuildProgram(*kernel_program, 0, NULL, build_options.c_str(), NULL, NULL);
-
- /* show warnings even if build is successful */
- size_t ret_val_size = 0;
-
- clGetProgramBuildInfo(*kernel_program, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
-
- if(ret_val_size > 1) {
- vector<char> build_log(ret_val_size + 1);
- clGetProgramBuildInfo(*kernel_program, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
-
- build_log[ret_val_size] = '\0';
- /* Skip meaningless empty output from the NVidia compiler. */
- if(!(ret_val_size == 2 && build_log[0] == '\n')) {
- fprintf(stderr, "OpenCL kernel build output:\n");
- fprintf(stderr, "%s\n", &build_log[0]);
- }
- }
-
- if(ciErr != CL_SUCCESS) {
- opencl_error("OpenCL build failed: errors in console");
- fprintf(stderr, "Build error: %s\n", clewErrorString(ciErr));
- return false;
- }
-
- return true;
- }
-
- bool compile_kernel(const string& kernel_name,
- const string& kernel_path,
- const string& source,
- const string& custom_kernel_build_options,
- cl_program *kernel_program,
- const string *debug_src = NULL)
- {
- /* We compile kernels consisting of many files. unfortunately OpenCL
- * kernel caches do not seem to recognize changes in included files.
- * so we force recompile on changes by adding the md5 hash of all files.
- */
- string inlined_source = path_source_replace_includes(source,
- kernel_path);
-
- if(debug_src) {
- path_write_text(*debug_src, inlined_source);
- }
-
- size_t source_len = inlined_source.size();
- const char *source_str = inlined_source.c_str();
-
- *kernel_program = clCreateProgramWithSource(cxContext,
- 1,
- &source_str,
- &source_len,
- &ciErr);
-
- if(opencl_error(ciErr)) {
- return false;
- }
-
- double starttime = time_dt();
- printf("Compiling %s OpenCL kernel ...\n", kernel_name.c_str());
- /* TODO(sergey): Report which kernel is being compiled
- * as well (megakernel or which of split kernels etc..).
- */
- printf("Build flags: %s\n", custom_kernel_build_options.c_str());
-
- if(!build_kernel(kernel_program, custom_kernel_build_options, debug_src))
- return false;
-
- printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
-
- return true;
- }
-
- string device_md5_hash(string kernel_custom_build_options = "")
- {
- MD5Hash md5;
- char version[256], driver[256], name[256], vendor[256];
-
- clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL);
- clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
- clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL);
- clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(driver), &driver, NULL);
-
- md5.append((uint8_t*)vendor, strlen(vendor));
- md5.append((uint8_t*)version, strlen(version));
- md5.append((uint8_t*)name, strlen(name));
- md5.append((uint8_t*)driver, strlen(driver));
-
- string options = kernel_build_options();
- options += kernel_custom_build_options;
- md5.append((uint8_t*)options.c_str(), options.size());
-
- return md5.get_hex();
- }
-
- bool load_kernels(const DeviceRequestedFeatures& requested_features)
- {
- /* Verify if device was initialized. */
- if(!device_initialized) {
- fprintf(stderr, "OpenCL: failed to initialize device.\n");
- return false;
- }
-
- /* Try to use cached kernel. */
- thread_scoped_lock cache_locker;
- cpProgram = load_cached_kernel(requested_features,
- OpenCLCache::OCL_DEV_BASE_PROGRAM,
- cache_locker);
-
- if(!cpProgram) {
- VLOG(2) << "No cached OpenCL kernel.";
-
- /* Verify we have right opencl version. */
- if(!opencl_version_check())
- return false;
-
- string build_flags = build_options_for_base_program(requested_features);
-
- /* Calculate md5 hashes to detect changes. */
- string kernel_path = path_get("kernel");
- string kernel_md5 = path_files_md5_hash(kernel_path);
- string device_md5 = device_md5_hash(build_flags);
-
- /* Path to cached binary.
- *
- * TODO(sergey): Seems we could de-duplicate all this string_printf()
- * calls with some utility function which will give file name for a
- * given hashes..
- */
- string clbin = string_printf("cycles_kernel_%s_%s.clbin",
- device_md5.c_str(),
- kernel_md5.c_str());
- clbin = path_user_get(path_join("cache", clbin));
-
- /* path to preprocessed source for debugging */
- string clsrc, *debug_src = NULL;
-
- if(opencl_kernel_use_debug()) {
- clsrc = string_printf("cycles_kernel_%s_%s.cl",
- device_md5.c_str(),
- kernel_md5.c_str());
- clsrc = path_user_get(path_join("cache", clsrc));
- debug_src = &clsrc;
- }
-
- /* If binary kernel exists already, try use it. */
- if(path_exists(clbin) && load_binary(kernel_path,
- clbin,
- build_flags,
- &cpProgram))
- {
- /* Kernel loaded from binary, nothing to do. */
- VLOG(2) << "Loaded kernel from " << clbin << ".";
- }
- else {
- VLOG(2) << "Kernel file " << clbin << " either doesn't exist or failed to be loaded by driver.";
- string init_kernel_source = "#include \"kernels/opencl/kernel.cl\" // " + kernel_md5 + "\n";
-
- /* If does not exist or loading binary failed, compile kernel. */
- if(!compile_kernel("base_kernel",
- kernel_path,
- init_kernel_source,
- build_flags,
- &cpProgram,
- debug_src))
- {
- return false;
- }
-
- /* Save binary for reuse. */
- if(!save_binary(&cpProgram, clbin)) {
- return false;
- }
- }
-
- /* Cache the program. */
- store_cached_kernel(cpPlatform,
- cdDevice,
- cpProgram,
- OpenCLCache::OCL_DEV_BASE_PROGRAM,
- cache_locker);
- }
- else {
- VLOG(2) << "Found cached OpenCL kernel.";
- }
-
- /* Find kernels. */
-#define FIND_KERNEL(kernel_var, kernel_name) \
- do { \
- kernel_var = clCreateKernel(cpProgram, "kernel_ocl_" kernel_name, &ciErr); \
- if(opencl_error(ciErr)) \
- return false; \
- } while(0)
-
- FIND_KERNEL(ckFilmConvertByteKernel, "convert_to_byte");
- FIND_KERNEL(ckFilmConvertHalfFloatKernel, "convert_to_half_float");
- FIND_KERNEL(ckShaderKernel, "shader");
- FIND_KERNEL(ckBakeKernel, "bake");
-
-#undef FIND_KERNEL
- return true;
- }
-
- ~OpenCLDeviceBase()
- {
- task_pool.stop();
-
- if(null_mem)
- clReleaseMemObject(CL_MEM_PTR(null_mem));
-
- ConstMemMap::iterator mt;
- for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
- mem_free(*(mt->second));
- delete mt->second;
- }
-
- if(ckFilmConvertByteKernel)
- clReleaseKernel(ckFilmConvertByteKernel);
- if(ckFilmConvertHalfFloatKernel)
- clReleaseKernel(ckFilmConvertHalfFloatKernel);
- if(ckShaderKernel)
- clReleaseKernel(ckShaderKernel);
- if(ckBakeKernel)
- clReleaseKernel(ckBakeKernel);
- if(cpProgram)
- clReleaseProgram(cpProgram);
- if(cqCommandQueue)
- clReleaseCommandQueue(cqCommandQueue);
- if(cxContext)
- clReleaseContext(cxContext);
- }
-
- void mem_alloc(device_memory& mem, MemoryType type)
- {
- size_t size = mem.memory_size();
-
- cl_mem_flags mem_flag;
- void *mem_ptr = NULL;
-
- if(type == MEM_READ_ONLY)
- mem_flag = CL_MEM_READ_ONLY;
- else if(type == MEM_WRITE_ONLY)
- mem_flag = CL_MEM_WRITE_ONLY;
- else
- mem_flag = CL_MEM_READ_WRITE;
-
- /* Zero-size allocation might be invoked by render, but not really
- * supported by OpenCL. Using NULL as device pointer also doesn't really
- * work for some reason, so for the time being we'll use special case
- * will null_mem buffer.
- */
- if(size != 0) {
- mem.device_pointer = (device_ptr)clCreateBuffer(cxContext,
- mem_flag,
- size,
- mem_ptr,
- &ciErr);
- opencl_assert_err(ciErr, "clCreateBuffer");
- }
- else {
- mem.device_pointer = null_mem;
- }
-
- stats.mem_alloc(size);
- mem.device_size = size;
- }
-
- void mem_copy_to(device_memory& mem)
- {
- /* this is blocking */
- size_t size = mem.memory_size();
- if(size != 0) {
- opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
- CL_MEM_PTR(mem.device_pointer),
- CL_TRUE,
- 0,
- size,
- (void*)mem.data_pointer,
- 0,
- NULL, NULL));
- }
- }
-
- void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
- {
- size_t offset = elem*y*w;
- size_t size = elem*w*h;
- assert(size != 0);
- opencl_assert(clEnqueueReadBuffer(cqCommandQueue,
- CL_MEM_PTR(mem.device_pointer),
- CL_TRUE,
- offset,
- size,
- (uchar*)mem.data_pointer + offset,
- 0,
- NULL, NULL));
- }
-
- void mem_zero(device_memory& mem)
- {
- if(mem.device_pointer) {
- memset((void*)mem.data_pointer, 0, mem.memory_size());
- mem_copy_to(mem);
- }
- }
-
- void mem_free(device_memory& mem)
- {
- if(mem.device_pointer) {
- if(mem.device_pointer != null_mem) {
- opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
- }
- mem.device_pointer = 0;
-
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
- }
- }
-
- void const_copy_to(const char *name, void *host, size_t size)
- {
- ConstMemMap::iterator i = const_mem_map.find(name);
-
- if(i == const_mem_map.end()) {
- device_vector<uchar> *data = new device_vector<uchar>();
- data->copy((uchar*)host, size);
-
- mem_alloc(*data, MEM_READ_ONLY);
- i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
- }
- else {
- device_vector<uchar> *data = i->second;
- data->copy((uchar*)host, size);
- }
-
- mem_copy_to(*i->second);
- }
-
- void tex_alloc(const char *name,
- device_memory& mem,
- InterpolationType /*interpolation*/,
- ExtensionType /*extension*/)
- {
- VLOG(1) << "Texture allocate: " << name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
- mem_alloc(mem, MEM_READ_ONLY);
- mem_copy_to(mem);
- assert(mem_map.find(name) == mem_map.end());
- mem_map.insert(MemMap::value_type(name, mem.device_pointer));
- }
-
- void tex_free(device_memory& mem)
- {
- if(mem.device_pointer) {
- foreach(const MemMap::value_type& value, mem_map) {
- if(value.second == mem.device_pointer) {
- mem_map.erase(value.first);
- break;
- }
- }
-
- mem_free(mem);
- }
- }
-
- size_t global_size_round_up(int group_size, int global_size)
- {
- int r = global_size % group_size;
- return global_size + ((r == 0)? 0: group_size - r);
- }
-
- void enqueue_kernel(cl_kernel kernel, size_t w, size_t h)
- {
- size_t workgroup_size, max_work_items[3];
-
- clGetKernelWorkGroupInfo(kernel, cdDevice,
- CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
- clGetDeviceInfo(cdDevice,
- CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
-
- /* Try to divide evenly over 2 dimensions. */
- size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
- size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
-
- /* Some implementations have max size 1 on 2nd dimension. */
- if(local_size[1] > max_work_items[1]) {
- local_size[0] = workgroup_size/max_work_items[1];
- local_size[1] = max_work_items[1];
- }
-
- size_t global_size[2] = {global_size_round_up(local_size[0], w),
- global_size_round_up(local_size[1], h)};
-
- /* Vertical size of 1 is coming from bake/shade kernels where we should
- * not round anything up because otherwise we'll either be doing too
- * much work per pixel (if we don't check global ID on Y axis) or will
- * be checking for global ID to always have Y of 0.
- */
- if(h == 1) {
- global_size[h] = 1;
- }
-
- /* run kernel */
- opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
- opencl_assert(clFlush(cqCommandQueue));
- }
-
- void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
- {
- cl_mem ptr;
-
- MemMap::iterator i = mem_map.find(name);
- if(i != mem_map.end()) {
- ptr = CL_MEM_PTR(i->second);
- }
- else {
- /* work around NULL not working, even though the spec says otherwise */
- ptr = CL_MEM_PTR(null_mem);
- }
-
- opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr));
- }
-
- void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
- {
- /* cast arguments to cl types */
- cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
- cl_mem d_rgba = (rgba_byte)? CL_MEM_PTR(rgba_byte): CL_MEM_PTR(rgba_half);
- cl_mem d_buffer = CL_MEM_PTR(buffer);
- cl_int d_x = task.x;
- cl_int d_y = task.y;
- cl_int d_w = task.w;
- cl_int d_h = task.h;
- cl_float d_sample_scale = 1.0f/(task.sample + 1);
- cl_int d_offset = task.offset;
- cl_int d_stride = task.stride;
-
-
- cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel;
-
- cl_uint start_arg_index =
- kernel_set_args(ckFilmConvertKernel,
- 0,
- d_data,
- d_rgba,
- d_buffer);
-
-#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-
- start_arg_index += kernel_set_args(ckFilmConvertKernel,
- start_arg_index,
- d_sample_scale,
- d_x,
- d_y,
- d_w,
- d_h,
- d_offset,
- d_stride);
-
- enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
- }
-
- void shader(DeviceTask& task)
- {
- /* cast arguments to cl types */
- cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
- cl_mem d_input = CL_MEM_PTR(task.shader_input);
- cl_mem d_output = CL_MEM_PTR(task.shader_output);
- cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
- cl_int d_shader_eval_type = task.shader_eval_type;
- cl_int d_shader_filter = task.shader_filter;
- cl_int d_shader_x = task.shader_x;
- cl_int d_shader_w = task.shader_w;
- cl_int d_offset = task.offset;
-
- cl_kernel kernel;
-
- if(task.shader_eval_type >= SHADER_EVAL_BAKE)
- kernel = ckBakeKernel;
- else
- kernel = ckShaderKernel;
-
- cl_uint start_arg_index =
- kernel_set_args(kernel,
- 0,
- d_data,
- d_input,
- d_output);
-
- if(task.shader_eval_type < SHADER_EVAL_BAKE) {
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_output_luma);
- }
-
-#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &start_arg_index, #name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_shader_eval_type);
- if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_shader_filter);
- }
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_shader_x,
- d_shader_w,
- d_offset);
-
- for(int sample = 0; sample < task.num_samples; sample++) {
-
- if(task.get_cancel())
- break;
-
- kernel_set_args(kernel, start_arg_index, sample);
-
- enqueue_kernel(kernel, task.shader_w, 1);
-
- clFinish(cqCommandQueue);
-
- task.update_progress(NULL);
- }
- }
-
- class OpenCLDeviceTask : public DeviceTask {
- public:
- OpenCLDeviceTask(OpenCLDeviceBase *device, DeviceTask& task)
- : DeviceTask(task)
- {
- run = function_bind(&OpenCLDeviceBase::thread_run,
- device,
- this);
- }
- };
-
- int get_split_task_count(DeviceTask& /*task*/)
- {
- return 1;
- }
-
- void task_add(DeviceTask& task)
- {
- task_pool.push(new OpenCLDeviceTask(this, task));
- }
-
- void task_wait()
- {
- task_pool.wait();
- }
-
- void task_cancel()
- {
- task_pool.cancel();
- }
-
- virtual void thread_run(DeviceTask * /*task*/) = 0;
-
-protected:
- string kernel_build_options(const string *debug_src = NULL)
- {
- string build_options = "-cl-fast-relaxed-math ";
-
- if(platform_name == "NVIDIA CUDA") {
- build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
- "-cl-nv-maxrregcount=32 "
- "-cl-nv-verbose ";
-
- uint compute_capability_major, compute_capability_minor;
- clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
- sizeof(cl_uint), &compute_capability_major, NULL);
- clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
- sizeof(cl_uint), &compute_capability_minor, NULL);
-
- build_options += string_printf("-D__COMPUTE_CAPABILITY__=%u ",
- compute_capability_major * 100 +
- compute_capability_minor * 10);
- }
-
- else if(platform_name == "Apple")
- build_options += "-D__KERNEL_OPENCL_APPLE__ ";
-
- else if(platform_name == "AMD Accelerated Parallel Processing")
- build_options += "-D__KERNEL_OPENCL_AMD__ ";
-
- else if(platform_name == "Intel(R) OpenCL") {
- build_options += "-D__KERNEL_OPENCL_INTEL_CPU__ ";
-
- /* Options for gdb source level kernel debugging.
- * this segfaults on linux currently.
- */
- if(opencl_kernel_use_debug() && debug_src)
- build_options += "-g -s \"" + *debug_src + "\" ";
- }
-
- if(opencl_kernel_use_debug())
- build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
-
-#ifdef WITH_CYCLES_DEBUG
- build_options += "-D__KERNEL_DEBUG__ ";
-#endif
-
- return build_options;
- }
-
- class ArgumentWrapper {
- public:
- ArgumentWrapper() : size(0), pointer(NULL) {}
- template <typename T>
- ArgumentWrapper(T& argument) : size(sizeof(argument)),
- pointer(&argument) { }
- ArgumentWrapper(int argument) : size(sizeof(int)),
- int_value(argument),
- pointer(&int_value) { }
- ArgumentWrapper(float argument) : size(sizeof(float)),
- float_value(argument),
- pointer(&float_value) { }
- size_t size;
- int int_value;
- float float_value;
- void *pointer;
- };
-
- /* TODO(sergey): In the future we can use variadic templates, once
- * C++0x is allowed. Should allow to clean this up a bit.
- */
- int kernel_set_args(cl_kernel kernel,
- int start_argument_index,
- const ArgumentWrapper& arg1 = ArgumentWrapper(),
- const ArgumentWrapper& arg2 = ArgumentWrapper(),
- const ArgumentWrapper& arg3 = ArgumentWrapper(),
- const ArgumentWrapper& arg4 = ArgumentWrapper(),
- const ArgumentWrapper& arg5 = ArgumentWrapper(),
- const ArgumentWrapper& arg6 = ArgumentWrapper(),
- const ArgumentWrapper& arg7 = ArgumentWrapper(),
- const ArgumentWrapper& arg8 = ArgumentWrapper(),
- const ArgumentWrapper& arg9 = ArgumentWrapper(),
- const ArgumentWrapper& arg10 = ArgumentWrapper(),
- const ArgumentWrapper& arg11 = ArgumentWrapper(),
- const ArgumentWrapper& arg12 = ArgumentWrapper(),
- const ArgumentWrapper& arg13 = ArgumentWrapper(),
- const ArgumentWrapper& arg14 = ArgumentWrapper(),
- const ArgumentWrapper& arg15 = ArgumentWrapper(),
- const ArgumentWrapper& arg16 = ArgumentWrapper(),
- const ArgumentWrapper& arg17 = ArgumentWrapper(),
- const ArgumentWrapper& arg18 = ArgumentWrapper(),
- const ArgumentWrapper& arg19 = ArgumentWrapper(),
- const ArgumentWrapper& arg20 = ArgumentWrapper(),
- const ArgumentWrapper& arg21 = ArgumentWrapper(),
- const ArgumentWrapper& arg22 = ArgumentWrapper(),
- const ArgumentWrapper& arg23 = ArgumentWrapper(),
- const ArgumentWrapper& arg24 = ArgumentWrapper(),
- const ArgumentWrapper& arg25 = ArgumentWrapper(),
- const ArgumentWrapper& arg26 = ArgumentWrapper(),
- const ArgumentWrapper& arg27 = ArgumentWrapper(),
- const ArgumentWrapper& arg28 = ArgumentWrapper(),
- const ArgumentWrapper& arg29 = ArgumentWrapper(),
- const ArgumentWrapper& arg30 = ArgumentWrapper(),
- const ArgumentWrapper& arg31 = ArgumentWrapper(),
- const ArgumentWrapper& arg32 = ArgumentWrapper(),
- const ArgumentWrapper& arg33 = ArgumentWrapper())
- {
- int current_arg_index = 0;
-#define FAKE_VARARG_HANDLE_ARG(arg) \
- do { \
- if(arg.pointer != NULL) { \
- opencl_assert(clSetKernelArg( \
- kernel, \
- start_argument_index + current_arg_index, \
- arg.size, arg.pointer)); \
- ++current_arg_index; \
- } \
- else { \
- return current_arg_index; \
- } \
- } while(false)
- FAKE_VARARG_HANDLE_ARG(arg1);
- FAKE_VARARG_HANDLE_ARG(arg2);
- FAKE_VARARG_HANDLE_ARG(arg3);
- FAKE_VARARG_HANDLE_ARG(arg4);
- FAKE_VARARG_HANDLE_ARG(arg5);
- FAKE_VARARG_HANDLE_ARG(arg6);
- FAKE_VARARG_HANDLE_ARG(arg7);
- FAKE_VARARG_HANDLE_ARG(arg8);
- FAKE_VARARG_HANDLE_ARG(arg9);
- FAKE_VARARG_HANDLE_ARG(arg10);
- FAKE_VARARG_HANDLE_ARG(arg11);
- FAKE_VARARG_HANDLE_ARG(arg12);
- FAKE_VARARG_HANDLE_ARG(arg13);
- FAKE_VARARG_HANDLE_ARG(arg14);
- FAKE_VARARG_HANDLE_ARG(arg15);
- FAKE_VARARG_HANDLE_ARG(arg16);
- FAKE_VARARG_HANDLE_ARG(arg17);
- FAKE_VARARG_HANDLE_ARG(arg18);
- FAKE_VARARG_HANDLE_ARG(arg19);
- FAKE_VARARG_HANDLE_ARG(arg20);
- FAKE_VARARG_HANDLE_ARG(arg21);
- FAKE_VARARG_HANDLE_ARG(arg22);
- FAKE_VARARG_HANDLE_ARG(arg23);
- FAKE_VARARG_HANDLE_ARG(arg24);
- FAKE_VARARG_HANDLE_ARG(arg25);
- FAKE_VARARG_HANDLE_ARG(arg26);
- FAKE_VARARG_HANDLE_ARG(arg27);
- FAKE_VARARG_HANDLE_ARG(arg28);
- FAKE_VARARG_HANDLE_ARG(arg29);
- FAKE_VARARG_HANDLE_ARG(arg30);
- FAKE_VARARG_HANDLE_ARG(arg31);
- FAKE_VARARG_HANDLE_ARG(arg32);
- FAKE_VARARG_HANDLE_ARG(arg33);
-#undef FAKE_VARARG_HANDLE_ARG
- return current_arg_index;
- }
-
- inline void release_kernel_safe(cl_kernel kernel)
- {
- if(kernel) {
- clReleaseKernel(kernel);
- }
- }
-
- inline void release_mem_object_safe(cl_mem mem)
- {
- if(mem != NULL) {
- clReleaseMemObject(mem);
- }
- }
-
- inline void release_program_safe(cl_program program)
- {
- if(program) {
- clReleaseProgram(program);
- }
- }
-
- /* ** Those guys are for workign around some compiler-specific bugs ** */
-
- virtual cl_program load_cached_kernel(
- const DeviceRequestedFeatures& /*requested_features*/,
- OpenCLCache::ProgramName program_name,
- thread_scoped_lock& cache_locker)
- {
- return OpenCLCache::get_program(cpPlatform,
- cdDevice,
- program_name,
- cache_locker);
- }
-
- virtual void store_cached_kernel(cl_platform_id platform,
- cl_device_id device,
- cl_program program,
- OpenCLCache::ProgramName program_name,
- thread_scoped_lock& cache_locker)
- {
- OpenCLCache::store_program(platform,
- device,
- program,
- program_name,
- cache_locker);
- }
-
- virtual string build_options_for_base_program(
- const DeviceRequestedFeatures& /*requested_features*/)
- {
- /* TODO(sergey): By default we compile all features, meaning
- * mega kernel is not getting feature-based optimizations.
- *
- * Ideally we need always compile kernel with as less features
- * enabled as possible to keep performance at it's max.
- */
- return "";
- }
-};
-
-class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
-{
-public:
- cl_kernel ckPathTraceKernel;
- cl_program path_trace_program;
-
- OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, bool background_)
- : OpenCLDeviceBase(info, stats, background_)
- {
- ckPathTraceKernel = NULL;
- path_trace_program = NULL;
- }
-
- bool load_kernels(const DeviceRequestedFeatures& requested_features)
- {
- /* Get Shader, bake and film convert kernels.
- * It'll also do verification of OpenCL actually initialized.
- */
- if(!OpenCLDeviceBase::load_kernels(requested_features)) {
- return false;
- }
-
- /* Try to use cached kernel. */
- thread_scoped_lock cache_locker;
- path_trace_program = OpenCLCache::get_program(cpPlatform,
- cdDevice,
- OpenCLCache::OCL_DEV_MEGAKERNEL_PROGRAM,
- cache_locker);
-
- if(!path_trace_program) {
- /* Verify we have right opencl version. */
- if(!opencl_version_check())
- return false;
-
- /* Calculate md5 hash to detect changes. */
- string kernel_path = path_get("kernel");
- string kernel_md5 = path_files_md5_hash(kernel_path);
- string custom_kernel_build_options = "-D__COMPILE_ONLY_MEGAKERNEL__ ";
- string device_md5 = device_md5_hash(custom_kernel_build_options);
-
- /* Path to cached binary. */
- string clbin = string_printf("cycles_kernel_%s_%s.clbin",
- device_md5.c_str(),
- kernel_md5.c_str());
- clbin = path_user_get(path_join("cache", clbin));
-
- /* Path to preprocessed source for debugging. */
- string clsrc, *debug_src = NULL;
- if(opencl_kernel_use_debug()) {
- clsrc = string_printf("cycles_kernel_%s_%s.cl",
- device_md5.c_str(),
- kernel_md5.c_str());
- clsrc = path_user_get(path_join("cache", clsrc));
- debug_src = &clsrc;
- }
-
- /* If exists already, try use it. */
- if(path_exists(clbin) && load_binary(kernel_path,
- clbin,
- custom_kernel_build_options,
- &path_trace_program,
- debug_src))
- {
- /* Kernel loaded from binary, nothing to do. */
- }
- else {
- string init_kernel_source = "#include \"kernels/opencl/kernel.cl\" // " +
- kernel_md5 + "\n";
- /* If does not exist or loading binary failed, compile kernel. */
- if(!compile_kernel("mega_kernel",
- kernel_path,
- init_kernel_source,
- custom_kernel_build_options,
- &path_trace_program,
- debug_src))
- {
- return false;
- }
- /* Save binary for reuse. */
- if(!save_binary(&path_trace_program, clbin)) {
- return false;
- }
- }
- /* Cache the program. */
- OpenCLCache::store_program(cpPlatform,
- cdDevice,
- path_trace_program,
- OpenCLCache::OCL_DEV_MEGAKERNEL_PROGRAM,
- cache_locker);
- }
-
- /* Find kernels. */
- ckPathTraceKernel = clCreateKernel(path_trace_program,
- "kernel_ocl_path_trace",
- &ciErr);
- if(opencl_error(ciErr))
- return false;
- return true;
- }
-
- ~OpenCLDeviceMegaKernel()
- {
- task_pool.stop();
- release_kernel_safe(ckPathTraceKernel);
- release_program_safe(path_trace_program);
- }
-
- void path_trace(RenderTile& rtile, int sample)
- {
- /* Cast arguments to cl types. */
- cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
- cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
- cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
- cl_int d_x = rtile.x;
- cl_int d_y = rtile.y;
- cl_int d_w = rtile.w;
- cl_int d_h = rtile.h;
- cl_int d_offset = rtile.offset;
- cl_int d_stride = rtile.stride;
-
- /* Sample arguments. */
- cl_int d_sample = sample;
-
- cl_uint start_arg_index =
- kernel_set_args(ckPathTraceKernel,
- 0,
- d_data,
- d_buffer,
- d_rng_state);
-
-#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-
- start_arg_index += kernel_set_args(ckPathTraceKernel,
- start_arg_index,
- d_sample,
- d_x,
- d_y,
- d_w,
- d_h,
- d_offset,
- d_stride);
-
- enqueue_kernel(ckPathTraceKernel, d_w, d_h);
- }
-
- void thread_run(DeviceTask *task)
- {
- if(task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
- }
- else if(task->type == DeviceTask::SHADER) {
- shader(*task);
- }
- else if(task->type == DeviceTask::PATH_TRACE) {
- RenderTile tile;
- /* Keep rendering tiles until done. */
- while(task->acquire_tile(this, tile)) {
- int start_sample = tile.start_sample;
- int end_sample = tile.start_sample + tile.num_samples;
-
- for(int sample = start_sample; sample < end_sample; sample++) {
- if(task->get_cancel()) {
- if(task->need_finish_queue == false)
- break;
- }
-
- path_trace(tile, sample);
-
- tile.sample = sample + 1;
-
- task->update_progress(&tile);
- }
-
- /* Complete kernel execution before release tile */
- /* This helps in multi-device render;
- * The device that reaches the critical-section function
- * release_tile waits (stalling other devices from entering
- * release_tile) for all kernels to complete. If device1 (a
- * slow-render device) reaches release_tile first then it would
- * stall device2 (a fast-render device) from proceeding to render
- * next tile.
- */
- clFinish(cqCommandQueue);
-
- task->release_tile(tile);
- }
- }
- }
-};
-
-/* TODO(sergey): This is to keep tile split on OpenCL level working
- * for now, since without this view-port render does not work as it
- * should.
- *
- * Ideally it'll be done on the higher level, but we need to get ready
- * for merge rather soon, so let's keep split logic private here in
- * the file.
- */
-class SplitRenderTile : public RenderTile {
-public:
- SplitRenderTile()
- : RenderTile(),
- buffer_offset_x(0),
- buffer_offset_y(0),
- rng_state_offset_x(0),
- rng_state_offset_y(0),
- buffer_rng_state_stride(0) {}
-
- explicit SplitRenderTile(RenderTile& tile)
- : RenderTile(),
- buffer_offset_x(0),
- buffer_offset_y(0),
- rng_state_offset_x(0),
- rng_state_offset_y(0),
- buffer_rng_state_stride(0)
- {
- x = tile.x;
- y = tile.y;
- w = tile.w;
- h = tile.h;
- start_sample = tile.start_sample;
- num_samples = tile.num_samples;
- sample = tile.sample;
- resolution = tile.resolution;
- offset = tile.offset;
- stride = tile.stride;
- buffer = tile.buffer;
- rng_state = tile.rng_state;
- buffers = tile.buffers;
- }
-
- /* Split kernel is device global memory constrained;
- * hence split kernel cant render big tile size's in
- * one go. If the user sets a big tile size (big tile size
- * is a term relative to the available device global memory),
- * we split the tile further and then call path_trace on
- * each of those split tiles. The following variables declared,
- * assist in achieving that purpose
- */
- int buffer_offset_x;
- int buffer_offset_y;
- int rng_state_offset_x;
- int rng_state_offset_y;
- int buffer_rng_state_stride;
-};
-
-/* OpenCLDeviceSplitKernel's declaration/definition. */
-class OpenCLDeviceSplitKernel : public OpenCLDeviceBase
-{
-public:
- /* Kernel declaration. */
- cl_kernel ckPathTraceKernel_data_init;
- cl_kernel ckPathTraceKernel_scene_intersect;
- cl_kernel ckPathTraceKernel_lamp_emission;
- cl_kernel ckPathTraceKernel_queue_enqueue;
- cl_kernel ckPathTraceKernel_background_buffer_update;
- cl_kernel ckPathTraceKernel_shader_eval;
- cl_kernel ckPathTraceKernel_holdout_emission_blurring_pathtermination_ao;
- cl_kernel ckPathTraceKernel_direct_lighting;
- cl_kernel ckPathTraceKernel_shadow_blocked;
- cl_kernel ckPathTraceKernel_next_iteration_setup;
- cl_kernel ckPathTraceKernel_sum_all_radiance;
-
- /* cl_program declaration. */
- cl_program data_init_program;
- cl_program scene_intersect_program;
- cl_program lamp_emission_program;
- cl_program queue_enqueue_program;
- cl_program background_buffer_update_program;
- cl_program shader_eval_program;
- cl_program holdout_emission_blurring_pathtermination_ao_program;
- cl_program direct_lighting_program;
- cl_program shadow_blocked_program;
- cl_program next_iteration_setup_program;
- cl_program sum_all_radiance_program;
-
- /* Global memory variables [porting]; These memory is used for
- * co-operation between different kernels; Data written by one
- * kernel will be available to another kernel via this global
- * memory.
- */
- cl_mem rng_coop;
- cl_mem throughput_coop;
- cl_mem L_transparent_coop;
- cl_mem PathRadiance_coop;
- cl_mem Ray_coop;
- cl_mem PathState_coop;
- cl_mem Intersection_coop;
- cl_mem kgbuffer; /* KernelGlobals buffer. */
-
- /* Global buffers for ShaderData. */
- cl_mem sd; /* ShaderData used in the main path-iteration loop. */
- cl_mem sd_DL_shadow; /* ShaderData used in Direct Lighting and
- * shadow_blocked kernel.
- */
-
- /* Global memory required for shadow blocked and accum_radiance. */
- cl_mem BSDFEval_coop;
- cl_mem ISLamp_coop;
- cl_mem LightRay_coop;
- cl_mem AOAlpha_coop;
- cl_mem AOBSDF_coop;
- cl_mem AOLightRay_coop;
- cl_mem Intersection_coop_shadow;
-
-#ifdef WITH_CYCLES_DEBUG
- /* DebugData memory */
- cl_mem debugdata_coop;
-#endif
-
- /* Global state array that tracks ray state. */
- cl_mem ray_state;
-
- /* Per sample buffers. */
- cl_mem per_sample_output_buffers;
-
- /* Denotes which sample each ray is being processed for. */
- cl_mem work_array;
-
- /* Queue */
- cl_mem Queue_data; /* Array of size queuesize * num_queues * sizeof(int). */
- cl_mem Queue_index; /* Array of size num_queues * sizeof(int);
- * Tracks the size of each queue.
- */
-
- /* Flag to make sceneintersect and lampemission kernel use queues. */
- cl_mem use_queues_flag;
-
- /* Amount of memory in output buffer associated with one pixel/thread. */
- size_t per_thread_output_buffer_size;
-
- /* Total allocatable available device memory. */
- size_t total_allocatable_memory;
-
- /* host version of ray_state; Used in checking host path-iteration
- * termination.
- */
- char *hostRayStateArray;
-
- /* Number of path-iterations to be done in one shot. */
- unsigned int PathIteration_times;
-
-#ifdef __WORK_STEALING__
- /* Work pool with respect to each work group. */
- cl_mem work_pool_wgs;
-
- /* Denotes the maximum work groups possible w.r.t. current tile size. */
- unsigned int max_work_groups;
-#endif
-
- /* clos_max value for which the kernels have been loaded currently. */
- int current_max_closure;
-
- /* Marked True in constructor and marked false at the end of path_trace(). */
- bool first_tile;
-
- OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_)
- : OpenCLDeviceBase(info, stats, background_)
- {
- background = background_;
-
- /* Initialize kernels. */
- ckPathTraceKernel_data_init = NULL;
- ckPathTraceKernel_scene_intersect = NULL;
- ckPathTraceKernel_lamp_emission = NULL;
- ckPathTraceKernel_background_buffer_update = NULL;
- ckPathTraceKernel_shader_eval = NULL;
- ckPathTraceKernel_holdout_emission_blurring_pathtermination_ao = NULL;
- ckPathTraceKernel_direct_lighting = NULL;
- ckPathTraceKernel_shadow_blocked = NULL;
- ckPathTraceKernel_next_iteration_setup = NULL;
- ckPathTraceKernel_sum_all_radiance = NULL;
- ckPathTraceKernel_queue_enqueue = NULL;
-
- /* Initialize program. */
- data_init_program = NULL;
- scene_intersect_program = NULL;
- lamp_emission_program = NULL;
- queue_enqueue_program = NULL;
- background_buffer_update_program = NULL;
- shader_eval_program = NULL;
- holdout_emission_blurring_pathtermination_ao_program = NULL;
- direct_lighting_program = NULL;
- shadow_blocked_program = NULL;
- next_iteration_setup_program = NULL;
- sum_all_radiance_program = NULL;
-
- /* Initialize cl_mem variables. */
- kgbuffer = NULL;
- sd = NULL;
- sd_DL_shadow = NULL;
-
- rng_coop = NULL;
- throughput_coop = NULL;
- L_transparent_coop = NULL;
- PathRadiance_coop = NULL;
- Ray_coop = NULL;
- PathState_coop = NULL;
- Intersection_coop = NULL;
- ray_state = NULL;
-
- AOAlpha_coop = NULL;
- AOBSDF_coop = NULL;
- AOLightRay_coop = NULL;
- BSDFEval_coop = NULL;
- ISLamp_coop = NULL;
- LightRay_coop = NULL;
- Intersection_coop_shadow = NULL;
-
-#ifdef WITH_CYCLES_DEBUG
- debugdata_coop = NULL;
-#endif
-
- work_array = NULL;
-
- /* Queue. */
- Queue_data = NULL;
- Queue_index = NULL;
- use_queues_flag = NULL;
-
- per_sample_output_buffers = NULL;
-
- per_thread_output_buffer_size = 0;
- hostRayStateArray = NULL;
- PathIteration_times = PATH_ITER_INC_FACTOR;
-#ifdef __WORK_STEALING__
- work_pool_wgs = NULL;
- max_work_groups = 0;
-#endif
- current_max_closure = -1;
- first_tile = true;
-
- /* Get device's maximum memory that can be allocated. */
- ciErr = clGetDeviceInfo(cdDevice,
- CL_DEVICE_MAX_MEM_ALLOC_SIZE,
- sizeof(size_t),
- &total_allocatable_memory,
- NULL);
- assert(ciErr == CL_SUCCESS);
- if(platform_name == "AMD Accelerated Parallel Processing") {
- /* This value is tweak-able; AMD platform does not seem to
- * give maximum performance when all of CL_DEVICE_MAX_MEM_ALLOC_SIZE
- * is considered for further computation.
- */
- total_allocatable_memory /= 2;
- }
- }
-
- /* TODO(sergey): Seems really close to load_kernel(),
- * could it be de-duplicated?
- */
- bool load_split_kernel(const string& kernel_name,
- const string& kernel_path,
- const string& kernel_init_source,
- const string& clbin,
- const string& custom_kernel_build_options,
- cl_program *program,
- const string *debug_src = NULL)
- {
- if(!opencl_version_check()) {
- return false;
- }
-
- string cache_clbin = path_user_get(path_join("cache", clbin));
-
- /* If exists already, try use it. */
- if(path_exists(cache_clbin) && load_binary(kernel_path,
- cache_clbin,
- custom_kernel_build_options,
- program,
- debug_src))
- {
- /* Kernel loaded from binary. */
- }
- else {
- /* If does not exist or loading binary failed, compile kernel. */
- if(!compile_kernel(kernel_name,
- kernel_path,
- kernel_init_source,
- custom_kernel_build_options,
- program,
- debug_src))
- {
- return false;
- }
- /* Save binary for reuse. */
- if(!save_binary(program, cache_clbin)) {
- return false;
- }
- }
- return true;
- }
-
- /* Split kernel utility functions. */
- size_t get_tex_size(const char *tex_name)
- {
- cl_mem ptr;
- size_t ret_size = 0;
- MemMap::iterator i = mem_map.find(tex_name);
- if(i != mem_map.end()) {
- ptr = CL_MEM_PTR(i->second);
- ciErr = clGetMemObjectInfo(ptr,
- CL_MEM_SIZE,
- sizeof(ret_size),
- &ret_size,
- NULL);
- assert(ciErr == CL_SUCCESS);
- }
- return ret_size;
- }
-
- size_t get_shader_data_size(size_t max_closure)
- {
- /* ShaderData size with variable size ShaderClosure array */
- return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure));
- }
-
- /* Returns size of KernelGlobals structure associated with OpenCL. */
- size_t get_KernelGlobals_size()
- {
- /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
- * fetch its size.
- */
- typedef struct KernelGlobals {
- ccl_constant KernelData *data;
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name;
-#include "kernel_textures.h"
-#undef KERNEL_TEX
- void *sd_input;
- void *isect_shadow;
- } KernelGlobals;
-
- return sizeof(KernelGlobals);
- }
-
- bool load_kernels(const DeviceRequestedFeatures& requested_features)
- {
- /* Get Shader, bake and film_convert kernels.
- * It'll also do verification of OpenCL actually initialized.
- */
- if(!OpenCLDeviceBase::load_kernels(requested_features)) {
- return false;
- }
-
- string kernel_path = path_get("kernel");
- string kernel_md5 = path_files_md5_hash(kernel_path);
- string device_md5;
- string kernel_init_source;
- string clbin;
- string clsrc, *debug_src = NULL;
-
- string build_options = "-D__SPLIT_KERNEL__ ";
-#ifdef __WORK_STEALING__
- build_options += "-D__WORK_STEALING__ ";
-#endif
- build_options += requested_features.get_build_options();
-
- /* Set compute device build option. */
- cl_device_type device_type;
- ciErr = clGetDeviceInfo(cdDevice,
- CL_DEVICE_TYPE,
- sizeof(cl_device_type),
- &device_type,
- NULL);
- assert(ciErr == CL_SUCCESS);
- if(device_type == CL_DEVICE_TYPE_GPU) {
- build_options += " -D__COMPUTE_DEVICE_GPU__";
- }
-
-#define GLUE(a, b) a ## b
-#define LOAD_KERNEL(name) \
- do { \
- kernel_init_source = "#include \"kernels/opencl/kernel_" #name ".cl\" // " + \
- kernel_md5 + "\n"; \
- device_md5 = device_md5_hash(build_options); \
- clbin = string_printf("cycles_kernel_%s_%s_" #name ".clbin", \
- device_md5.c_str(), kernel_md5.c_str()); \
- if(opencl_kernel_use_debug()) { \
- clsrc = string_printf("cycles_kernel_%s_%s_" #name ".cl", \
- device_md5.c_str(), kernel_md5.c_str()); \
- clsrc = path_user_get(path_join("cache", clsrc)); \
- debug_src = &clsrc; \
- } \
- if(!load_split_kernel(#name, \
- kernel_path, \
- kernel_init_source, \
- clbin, \
- build_options, \
- &GLUE(name, _program), \
- debug_src)) \
- { \
- fprintf(stderr, "Faled to compile %s\n", #name); \
- return false; \
- } \
- } while(false)
-
- LOAD_KERNEL(data_init);
- LOAD_KERNEL(scene_intersect);
- LOAD_KERNEL(lamp_emission);
- LOAD_KERNEL(queue_enqueue);
- LOAD_KERNEL(background_buffer_update);
- LOAD_KERNEL(shader_eval);
- LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
- LOAD_KERNEL(direct_lighting);
- LOAD_KERNEL(shadow_blocked);
- LOAD_KERNEL(next_iteration_setup);
- LOAD_KERNEL(sum_all_radiance);
-
-#undef LOAD_KERNEL
-
-#define FIND_KERNEL(name) \
- do { \
- GLUE(ckPathTraceKernel_, name) = \
- clCreateKernel(GLUE(name, _program), \
- "kernel_ocl_path_trace_" #name, &ciErr); \
- if(opencl_error(ciErr)) { \
- fprintf(stderr,"Missing kernel kernel_ocl_path_trace_%s\n", #name); \
- return false; \
- } \
- } while(false)
-
- FIND_KERNEL(data_init);
- FIND_KERNEL(scene_intersect);
- FIND_KERNEL(lamp_emission);
- FIND_KERNEL(queue_enqueue);
- FIND_KERNEL(background_buffer_update);
- FIND_KERNEL(shader_eval);
- FIND_KERNEL(holdout_emission_blurring_pathtermination_ao);
- FIND_KERNEL(direct_lighting);
- FIND_KERNEL(shadow_blocked);
- FIND_KERNEL(next_iteration_setup);
- FIND_KERNEL(sum_all_radiance);
-#undef FIND_KERNEL
-#undef GLUE
-
- current_max_closure = requested_features.max_closure;
-
- return true;
- }
-
- ~OpenCLDeviceSplitKernel()
- {
- task_pool.stop();
-
- /* Release kernels */
- release_kernel_safe(ckPathTraceKernel_data_init);
- release_kernel_safe(ckPathTraceKernel_scene_intersect);
- release_kernel_safe(ckPathTraceKernel_lamp_emission);
- release_kernel_safe(ckPathTraceKernel_queue_enqueue);
- release_kernel_safe(ckPathTraceKernel_background_buffer_update);
- release_kernel_safe(ckPathTraceKernel_shader_eval);
- release_kernel_safe(ckPathTraceKernel_holdout_emission_blurring_pathtermination_ao);
- release_kernel_safe(ckPathTraceKernel_direct_lighting);
- release_kernel_safe(ckPathTraceKernel_shadow_blocked);
- release_kernel_safe(ckPathTraceKernel_next_iteration_setup);
- release_kernel_safe(ckPathTraceKernel_sum_all_radiance);
-
- /* Release global memory */
- release_mem_object_safe(rng_coop);
- release_mem_object_safe(throughput_coop);
- release_mem_object_safe(L_transparent_coop);
- release_mem_object_safe(PathRadiance_coop);
- release_mem_object_safe(Ray_coop);
- release_mem_object_safe(PathState_coop);
- release_mem_object_safe(Intersection_coop);
- release_mem_object_safe(kgbuffer);
- release_mem_object_safe(sd);
- release_mem_object_safe(sd_DL_shadow);
- release_mem_object_safe(ray_state);
- release_mem_object_safe(AOAlpha_coop);
- release_mem_object_safe(AOBSDF_coop);
- release_mem_object_safe(AOLightRay_coop);
- release_mem_object_safe(BSDFEval_coop);
- release_mem_object_safe(ISLamp_coop);
- release_mem_object_safe(LightRay_coop);
- release_mem_object_safe(Intersection_coop_shadow);
-#ifdef WITH_CYCLES_DEBUG
- release_mem_object_safe(debugdata_coop);
-#endif
- release_mem_object_safe(use_queues_flag);
- release_mem_object_safe(Queue_data);
- release_mem_object_safe(Queue_index);
- release_mem_object_safe(work_array);
-#ifdef __WORK_STEALING__
- release_mem_object_safe(work_pool_wgs);
-#endif
- release_mem_object_safe(per_sample_output_buffers);
-
- /* Release programs */
- release_program_safe(data_init_program);
- release_program_safe(scene_intersect_program);
- release_program_safe(lamp_emission_program);
- release_program_safe(queue_enqueue_program);
- release_program_safe(background_buffer_update_program);
- release_program_safe(shader_eval_program);
- release_program_safe(holdout_emission_blurring_pathtermination_ao_program);
- release_program_safe(direct_lighting_program);
- release_program_safe(shadow_blocked_program);
- release_program_safe(next_iteration_setup_program);
- release_program_safe(sum_all_radiance_program);
-
- if(hostRayStateArray != NULL) {
- free(hostRayStateArray);
- }
- }
-
- void path_trace(DeviceTask *task,
- SplitRenderTile& rtile,
- int2 max_render_feasible_tile_size)
- {
- /* cast arguments to cl types */
- cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
- cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
- cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
- cl_int d_x = rtile.x;
- cl_int d_y = rtile.y;
- cl_int d_w = rtile.w;
- cl_int d_h = rtile.h;
- cl_int d_offset = rtile.offset;
- cl_int d_stride = rtile.stride;
-
- /* Make sure that set render feasible tile size is a multiple of local
- * work size dimensions.
- */
- assert(max_render_feasible_tile_size.x % SPLIT_KERNEL_LOCAL_SIZE_X == 0);
- assert(max_render_feasible_tile_size.y % SPLIT_KERNEL_LOCAL_SIZE_Y == 0);
-
- size_t global_size[2];
- size_t local_size[2] = {SPLIT_KERNEL_LOCAL_SIZE_X,
- SPLIT_KERNEL_LOCAL_SIZE_Y};
-
- /* Set the range of samples to be processed for every ray in
- * path-regeneration logic.
- */
- cl_int start_sample = rtile.start_sample;
- cl_int end_sample = rtile.start_sample + rtile.num_samples;
- cl_int num_samples = rtile.num_samples;
-
-#ifdef __WORK_STEALING__
- global_size[0] = (((d_w - 1) / local_size[0]) + 1) * local_size[0];
- global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
- unsigned int num_parallel_samples = 1;
-#else
- global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
- unsigned int num_threads = max_render_feasible_tile_size.x *
- max_render_feasible_tile_size.y;
- unsigned int num_tile_columns_possible = num_threads / global_size[1];
- /* Estimate number of parallel samples that can be
- * processed in parallel.
- */
- unsigned int num_parallel_samples = min(num_tile_columns_possible / d_w,
- rtile.num_samples);
- /* Wavefront size in AMD is 64.
- * TODO(sergey): What about other platforms?
- */
- if(num_parallel_samples >= 64) {
- /* TODO(sergey): Could use generic round-up here. */
- num_parallel_samples = (num_parallel_samples / 64) * 64;
- }
- assert(num_parallel_samples != 0);
-
- global_size[0] = d_w * num_parallel_samples;
-#endif /* __WORK_STEALING__ */
-
- assert(global_size[0] * global_size[1] <=
- max_render_feasible_tile_size.x * max_render_feasible_tile_size.y);
-
- /* Allocate all required global memory once. */
- if(first_tile) {
- size_t num_global_elements = max_render_feasible_tile_size.x *
- max_render_feasible_tile_size.y;
- /* TODO(sergey): This will actually over-allocate if
- * particular kernel does not support multiclosure.
- */
- size_t shaderdata_size = get_shader_data_size(current_max_closure);
-
-#ifdef __WORK_STEALING__
- /* Calculate max groups */
- size_t max_global_size[2];
- size_t tile_x = max_render_feasible_tile_size.x;
- size_t tile_y = max_render_feasible_tile_size.y;
- max_global_size[0] = (((tile_x - 1) / local_size[0]) + 1) * local_size[0];
- max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * local_size[1];
- max_work_groups = (max_global_size[0] * max_global_size[1]) /
- (local_size[0] * local_size[1]);
- /* Allocate work_pool_wgs memory. */
- work_pool_wgs = mem_alloc(max_work_groups * sizeof(unsigned int));
-#endif /* __WORK_STEALING__ */
-
- /* Allocate queue_index memory only once. */
- Queue_index = mem_alloc(NUM_QUEUES * sizeof(int));
- use_queues_flag = mem_alloc(sizeof(char));
- kgbuffer = mem_alloc(get_KernelGlobals_size());
-
- /* Create global buffers for ShaderData. */
- sd = mem_alloc(num_global_elements * shaderdata_size);
- sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size);
-
- /* Creation of global memory buffers which are shared among
- * the kernels.
- */
- rng_coop = mem_alloc(num_global_elements * sizeof(RNG));
- throughput_coop = mem_alloc(num_global_elements * sizeof(float3));
- L_transparent_coop = mem_alloc(num_global_elements * sizeof(float));
- PathRadiance_coop = mem_alloc(num_global_elements * sizeof(PathRadiance));
- Ray_coop = mem_alloc(num_global_elements * sizeof(Ray));
- PathState_coop = mem_alloc(num_global_elements * sizeof(PathState));
- Intersection_coop = mem_alloc(num_global_elements * sizeof(Intersection));
- AOAlpha_coop = mem_alloc(num_global_elements * sizeof(float3));
- AOBSDF_coop = mem_alloc(num_global_elements * sizeof(float3));
- AOLightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
- BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval));
- ISLamp_coop = mem_alloc(num_global_elements * sizeof(int));
- LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
- Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection));
-
-#ifdef WITH_CYCLES_DEBUG
- debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData));
-#endif
-
- ray_state = mem_alloc(num_global_elements * sizeof(char));
-
- hostRayStateArray = (char *)calloc(num_global_elements, sizeof(char));
- assert(hostRayStateArray != NULL && "Can't create hostRayStateArray memory");
-
- Queue_data = mem_alloc(num_global_elements * (NUM_QUEUES * sizeof(int)+sizeof(int)));
- work_array = mem_alloc(num_global_elements * sizeof(unsigned int));
- per_sample_output_buffers = mem_alloc(num_global_elements *
- per_thread_output_buffer_size);
- }
-
- cl_int dQueue_size = global_size[0] * global_size[1];
-
- cl_uint start_arg_index =
- kernel_set_args(ckPathTraceKernel_data_init,
- 0,
- kgbuffer,
- sd_DL_shadow,
- d_data,
- per_sample_output_buffers,
- d_rng_state,
- rng_coop,
- throughput_coop,
- L_transparent_coop,
- PathRadiance_coop,
- Ray_coop,
- PathState_coop,
- Intersection_coop_shadow,
- ray_state);
-
-/* TODO(sergey): Avoid map lookup here. */
-#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(ckPathTraceKernel_data_init, &start_arg_index, #name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
-
- start_arg_index +=
- kernel_set_args(ckPathTraceKernel_data_init,
- start_arg_index,
- start_sample,
- d_x,
- d_y,
- d_w,
- d_h,
- d_offset,
- d_stride,
- rtile.rng_state_offset_x,
- rtile.rng_state_offset_y,
- rtile.buffer_rng_state_stride,
- Queue_data,
- Queue_index,
- dQueue_size,
- use_queues_flag,
- work_array,
-#ifdef __WORK_STEALING__
- work_pool_wgs,
- num_samples,
-#endif
-#ifdef WITH_CYCLES_DEBUG
- debugdata_coop,
-#endif
- num_parallel_samples);
-
- kernel_set_args(ckPathTraceKernel_scene_intersect,
- 0,
- kgbuffer,
- d_data,
- rng_coop,
- Ray_coop,
- PathState_coop,
- Intersection_coop,
- ray_state,
- d_w,
- d_h,
- Queue_data,
- Queue_index,
- dQueue_size,
- use_queues_flag,
-#ifdef WITH_CYCLES_DEBUG
- debugdata_coop,
-#endif
- num_parallel_samples);
-
- kernel_set_args(ckPathTraceKernel_lamp_emission,
- 0,
- kgbuffer,
- d_data,
- throughput_coop,
- PathRadiance_coop,
- Ray_coop,
- PathState_coop,
- Intersection_coop,
- ray_state,
- d_w,
- d_h,
- Queue_data,
- Queue_index,
- dQueue_size,
- use_queues_flag,
- num_parallel_samples);
-
- kernel_set_args(ckPathTraceKernel_queue_enqueue,
- 0,
- Queue_data,
- Queue_index,
- ray_state,
- dQueue_size);
-
- kernel_set_args(ckPathTraceKernel_background_buffer_update,
- 0,
- kgbuffer,
- d_data,
- per_sample_output_buffers,
- d_rng_state,
- rng_coop,
- throughput_coop,
- PathRadiance_coop,
- Ray_coop,
- PathState_coop,
- L_transparent_coop,
- ray_state,
- d_w,
- d_h,
- d_x,
- d_y,
- d_stride,
- rtile.rng_state_offset_x,
- rtile.rng_state_offset_y,
- rtile.buffer_rng_state_stride,
- work_array,
- Queue_data,
- Queue_index,
- dQueue_size,
- end_sample,
- start_sample,
-#ifdef __WORK_STEALING__
- work_pool_wgs,
- num_samples,
-#endif
-#ifdef WITH_CYCLES_DEBUG
- debugdata_coop,
-#endif
- num_parallel_samples);
-
- kernel_set_args(ckPathTraceKernel_shader_eval,
- 0,
- kgbuffer,
- d_data,
- sd,
- rng_coop,
- Ray_coop,
- PathState_coop,
- Intersection_coop,
- ray_state,
- Queue_data,
- Queue_index,
- dQueue_size);
-
- kernel_set_args(ckPathTraceKernel_holdout_emission_blurring_pathtermination_ao,
- 0,
- kgbuffer,
- d_data,
- sd,
- per_sample_output_buffers,
- rng_coop,
- throughput_coop,
- L_transparent_coop,
- PathRadiance_coop,
- PathState_coop,
- Intersection_coop,
- AOAlpha_coop,
- AOBSDF_coop,
- AOLightRay_coop,
- d_w,
- d_h,
- d_x,
- d_y,
- d_stride,
- ray_state,
- work_array,
- Queue_data,
- Queue_index,
- dQueue_size,
-#ifdef __WORK_STEALING__
- start_sample,
-#endif
- num_parallel_samples);
-
- kernel_set_args(ckPathTraceKernel_direct_lighting,
- 0,
- kgbuffer,
- d_data,
- sd,
- rng_coop,
- PathState_coop,
- ISLamp_coop,
- LightRay_coop,
- BSDFEval_coop,
- ray_state,
- Queue_data,
- Queue_index,
- dQueue_size);
-
- kernel_set_args(ckPathTraceKernel_shadow_blocked,
- 0,
- kgbuffer,
- d_data,
- PathState_coop,
- LightRay_coop,
- AOLightRay_coop,
- ray_state,
- Queue_data,
- Queue_index,
- dQueue_size);
-
- kernel_set_args(ckPathTraceKernel_next_iteration_setup,
- 0,
- kgbuffer,
- d_data,
- sd,
- rng_coop,
- throughput_coop,
- PathRadiance_coop,
- Ray_coop,
- PathState_coop,
- LightRay_coop,
- ISLamp_coop,
- BSDFEval_coop,
- AOLightRay_coop,
- AOBSDF_coop,
- AOAlpha_coop,
- ray_state,
- Queue_data,
- Queue_index,
- dQueue_size,
- use_queues_flag);
-
- kernel_set_args(ckPathTraceKernel_sum_all_radiance,
- 0,
- d_data,
- d_buffer,
- per_sample_output_buffers,
- num_parallel_samples,
- d_w,
- d_h,
- d_stride,
- rtile.buffer_offset_x,
- rtile.buffer_offset_y,
- rtile.buffer_rng_state_stride,
- start_sample);
-
- /* Macro for Enqueuing split kernels. */
-#define GLUE(a, b) a ## b
-#define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \
- { \
- ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \
- GLUE(ckPathTraceKernel_, \
- kernelName), \
- 2, \
- NULL, \
- globalSize, \
- localSize, \
- 0, \
- NULL, \
- NULL); \
- opencl_assert_err(ciErr, "clEnqueueNDRangeKernel"); \
- if(ciErr != CL_SUCCESS) { \
- string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", \
- clewErrorString(ciErr)); \
- opencl_error(message); \
- return; \
- } \
- } (void) 0
-
- /* Enqueue ckPathTraceKernel_data_init kernel. */
- ENQUEUE_SPLIT_KERNEL(data_init, global_size, local_size);
- bool activeRaysAvailable = true;
-
- /* Record number of time host intervention has been made */
- unsigned int numHostIntervention = 0;
- unsigned int numNextPathIterTimes = PathIteration_times;
- bool canceled = false;
- while(activeRaysAvailable) {
- /* Twice the global work size of other kernels for
- * ckPathTraceKernel_shadow_blocked_direct_lighting. */
- size_t global_size_shadow_blocked[2];
- global_size_shadow_blocked[0] = global_size[0] * 2;
- global_size_shadow_blocked[1] = global_size[1];
-
- /* Do path-iteration in host [Enqueue Path-iteration kernels. */
- for(int PathIter = 0; PathIter < PathIteration_times; PathIter++) {
- ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
- ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
- if(task->get_cancel()) {
- canceled = true;
- break;
- }
- }
-
- /* Read ray-state into Host memory to decide if we should exit
- * path-iteration in host.
- */
- ciErr = clEnqueueReadBuffer(cqCommandQueue,
- ray_state,
- CL_TRUE,
- 0,
- global_size[0] * global_size[1] * sizeof(char),
- hostRayStateArray,
- 0,
- NULL,
- NULL);
- assert(ciErr == CL_SUCCESS);
-
- activeRaysAvailable = false;
-
- for(int rayStateIter = 0;
- rayStateIter < global_size[0] * global_size[1];
- ++rayStateIter)
- {
- if(int8_t(hostRayStateArray[rayStateIter]) != RAY_INACTIVE) {
- /* Not all rays are RAY_INACTIVE. */
- activeRaysAvailable = true;
- break;
- }
- }
-
- if(activeRaysAvailable) {
- numHostIntervention++;
- PathIteration_times = PATH_ITER_INC_FACTOR;
- /* Host intervention done before all rays become RAY_INACTIVE;
- * Set do more initial iterations for the next tile.
- */
- numNextPathIterTimes += PATH_ITER_INC_FACTOR;
- }
- if(task->get_cancel()) {
- canceled = true;
- break;
- }
- }
-
- /* Execute SumALLRadiance kernel to accumulate radiance calculated in
- * per_sample_output_buffers into RenderTile's output buffer.
- */
- if(!canceled) {
- size_t sum_all_radiance_local_size[2] = {16, 16};
- size_t sum_all_radiance_global_size[2];
- sum_all_radiance_global_size[0] =
- (((d_w - 1) / sum_all_radiance_local_size[0]) + 1) *
- sum_all_radiance_local_size[0];
- sum_all_radiance_global_size[1] =
- (((d_h - 1) / sum_all_radiance_local_size[1]) + 1) *
- sum_all_radiance_local_size[1];
- ENQUEUE_SPLIT_KERNEL(sum_all_radiance,
- sum_all_radiance_global_size,
- sum_all_radiance_local_size);
- }
-
-#undef ENQUEUE_SPLIT_KERNEL
-#undef GLUE
-
- if(numHostIntervention == 0) {
- /* This means that we are executing kernel more than required
- * Must avoid this for the next sample/tile.
- */
- PathIteration_times = ((numNextPathIterTimes - PATH_ITER_INC_FACTOR) <= 0) ?
- PATH_ITER_INC_FACTOR : numNextPathIterTimes - PATH_ITER_INC_FACTOR;
- }
- else {
- /* Number of path-iterations done for this tile is set as
- * Initial path-iteration times for the next tile
- */
- PathIteration_times = numNextPathIterTimes;
- }
-
- first_tile = false;
- }
-
- /* Calculates the amount of memory that has to be always
- * allocated in order for the split kernel to function.
- * This memory is tile/scene-property invariant (meaning,
- * the value returned by this function does not depend
- * on the user set tile size or scene properties.
- */
- size_t get_invariable_mem_allocated()
- {
- size_t total_invariable_mem_allocated = 0;
- size_t KernelGlobals_size = 0;
-
- KernelGlobals_size = get_KernelGlobals_size();
-
- total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */
- total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */
- total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */
-
- return total_invariable_mem_allocated;
- }
-
- /* Calculate the memory that has-to-be/has-been allocated for
- * the split kernel to function.
- */
- size_t get_tile_specific_mem_allocated(const int2 tile_size)
- {
- size_t tile_specific_mem_allocated = 0;
-
- /* Get required tile info */
- unsigned int user_set_tile_w = tile_size.x;
- unsigned int user_set_tile_h = tile_size.y;
-
-#ifdef __WORK_STEALING__
- /* Calculate memory to be allocated for work_pools in
- * case of work_stealing.
- */
- size_t max_global_size[2];
- size_t max_num_work_pools = 0;
- max_global_size[0] =
- (((user_set_tile_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- max_global_size[1] =
- (((user_set_tile_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- max_num_work_pools =
- (max_global_size[0] * max_global_size[1]) /
- (SPLIT_KERNEL_LOCAL_SIZE_X * SPLIT_KERNEL_LOCAL_SIZE_Y);
- tile_specific_mem_allocated += max_num_work_pools * sizeof(unsigned int);
-#endif
-
- tile_specific_mem_allocated +=
- user_set_tile_w * user_set_tile_h * per_thread_output_buffer_size;
- tile_specific_mem_allocated +=
- user_set_tile_w * user_set_tile_h * sizeof(RNG);
-
- return tile_specific_mem_allocated;
- }
-
- /* Calculates the texture memories and KernelData (d_data) memory
- * that has been allocated.
- */
- size_t get_scene_specific_mem_allocated(cl_mem d_data)
- {
- size_t scene_specific_mem_allocated = 0;
- /* Calculate texture memories. */
-#define KERNEL_TEX(type, ttype, name) \
- scene_specific_mem_allocated += get_tex_size(#name);
-#include "kernel_textures.h"
-#undef KERNEL_TEX
- size_t d_data_size;
- ciErr = clGetMemObjectInfo(d_data,
- CL_MEM_SIZE,
- sizeof(d_data_size),
- &d_data_size,
- NULL);
- assert(ciErr == CL_SUCCESS && "Can't get d_data mem object info");
- scene_specific_mem_allocated += d_data_size;
- return scene_specific_mem_allocated;
- }
-
- /* Calculate the memory required for one thread in split kernel. */
- size_t get_per_thread_memory()
- {
- size_t shaderdata_size = 0;
- /* TODO(sergey): This will actually over-allocate if
- * particular kernel does not support multiclosure.
- */
- shaderdata_size = get_shader_data_size(current_max_closure);
- size_t retval = sizeof(RNG)
- + sizeof(float3) /* Throughput size */
- + sizeof(float) /* L transparent size */
- + sizeof(char) /* Ray state size */
- + sizeof(unsigned int) /* Work element size */
- + sizeof(int) /* ISLamp_size */
- + sizeof(PathRadiance) + sizeof(Ray) + sizeof(PathState)
- + sizeof(Intersection) /* Overall isect */
- + sizeof(Intersection) /* Instersection_coop_AO */
- + sizeof(Intersection) /* Intersection coop DL */
- + shaderdata_size /* Overall ShaderData */
- + (shaderdata_size * 2) /* ShaderData : DL and shadow */
- + sizeof(Ray) + sizeof(BsdfEval)
- + sizeof(float3) /* AOAlpha size */
- + sizeof(float3) /* AOBSDF size */
- + sizeof(Ray)
- + (sizeof(int) * NUM_QUEUES)
- + per_thread_output_buffer_size;
- return retval;
- }
-
- /* Considers the total memory available in the device and
- * and returns the maximum global work size possible.
- */
- size_t get_feasible_global_work_size(int2 tile_size, cl_mem d_data)
- {
- /* Calculate invariably allocated memory. */
- size_t invariable_mem_allocated = get_invariable_mem_allocated();
- /* Calculate tile specific allocated memory. */
- size_t tile_specific_mem_allocated =
- get_tile_specific_mem_allocated(tile_size);
- /* Calculate scene specific allocated memory. */
- size_t scene_specific_mem_allocated =
- get_scene_specific_mem_allocated(d_data);
- /* Calculate total memory available for the threads in global work size. */
- size_t available_memory = total_allocatable_memory
- - invariable_mem_allocated
- - tile_specific_mem_allocated
- - scene_specific_mem_allocated
- - DATA_ALLOCATION_MEM_FACTOR;
- size_t per_thread_memory_required = get_per_thread_memory();
- return (available_memory / per_thread_memory_required);
- }
-
- /* Checks if the device has enough memory to render the whole tile;
- * If not, we should split single tile into multiple tiles of small size
- * and process them all.
- */
- bool need_to_split_tile(unsigned int d_w,
- unsigned int d_h,
- int2 max_render_feasible_tile_size)
- {
- size_t global_size_estimate[2];
- /* TODO(sergey): Such round-ups are in quite few places, need to replace
- * them with an utility macro.
- */
- global_size_estimate[0] =
- (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- global_size_estimate[1] =
- (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- if((global_size_estimate[0] * global_size_estimate[1]) >
- (max_render_feasible_tile_size.x * max_render_feasible_tile_size.y))
- {
- return true;
- }
- else {
- return false;
- }
- }
-
- /* Considers the scene properties, global memory available in the device
- * and returns a rectanglular tile dimension (approx the maximum)
- * that should render on split kernel.
- */
- int2 get_max_render_feasible_tile_size(size_t feasible_global_work_size)
- {
- int2 max_render_feasible_tile_size;
- int square_root_val = (int)sqrt(feasible_global_work_size);
- max_render_feasible_tile_size.x = square_root_val;
- max_render_feasible_tile_size.y = square_root_val;
- /* Ciel round-off max_render_feasible_tile_size. */
- int2 ceil_render_feasible_tile_size;
- ceil_render_feasible_tile_size.x =
- (((max_render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- ceil_render_feasible_tile_size.y =
- (((max_render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- if(ceil_render_feasible_tile_size.x * ceil_render_feasible_tile_size.y <=
- feasible_global_work_size)
- {
- return ceil_render_feasible_tile_size;
- }
- /* Floor round-off max_render_feasible_tile_size. */
- int2 floor_render_feasible_tile_size;
- floor_render_feasible_tile_size.x =
- (max_render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- floor_render_feasible_tile_size.y =
- (max_render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- return floor_render_feasible_tile_size;
- }
-
- /* Try splitting the current tile into multiple smaller
- * almost-square-tiles.
- */
- int2 get_split_tile_size(RenderTile rtile,
- int2 max_render_feasible_tile_size)
- {
- int2 split_tile_size;
- int num_global_threads = max_render_feasible_tile_size.x *
- max_render_feasible_tile_size.y;
- int d_w = rtile.w;
- int d_h = rtile.h;
- /* Ceil round off d_w and d_h */
- d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- while(d_w * d_h > num_global_threads) {
- /* Halve the longer dimension. */
- if(d_w >= d_h) {
- d_w = d_w / 2;
- d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- }
- else {
- d_h = d_h / 2;
- d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- }
- }
- split_tile_size.x = d_w;
- split_tile_size.y = d_h;
- return split_tile_size;
- }
-
- /* Splits existing tile into multiple tiles of tile size split_tile_size. */
- vector<SplitRenderTile> split_tiles(RenderTile rtile, int2 split_tile_size)
- {
- vector<SplitRenderTile> to_path_trace_rtile;
- int d_w = rtile.w;
- int d_h = rtile.h;
- int num_tiles_x = (((d_w - 1) / split_tile_size.x) + 1);
- int num_tiles_y = (((d_h - 1) / split_tile_size.y) + 1);
- /* Buffer and rng_state offset calc. */
- size_t offset_index = rtile.offset + (rtile.x + rtile.y * rtile.stride);
- size_t offset_x = offset_index % rtile.stride;
- size_t offset_y = offset_index / rtile.stride;
- /* Resize to_path_trace_rtile. */
- to_path_trace_rtile.resize(num_tiles_x * num_tiles_y);
- for(int tile_iter_y = 0; tile_iter_y < num_tiles_y; tile_iter_y++) {
- for(int tile_iter_x = 0; tile_iter_x < num_tiles_x; tile_iter_x++) {
- int rtile_index = tile_iter_y * num_tiles_x + tile_iter_x;
- to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x * split_tile_size.x;
- to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y * split_tile_size.y;
- to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x * split_tile_size.x;
- to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y * split_tile_size.y;
- to_path_trace_rtile[rtile_index].start_sample = rtile.start_sample;
- to_path_trace_rtile[rtile_index].num_samples = rtile.num_samples;
- to_path_trace_rtile[rtile_index].sample = rtile.sample;
- to_path_trace_rtile[rtile_index].resolution = rtile.resolution;
- to_path_trace_rtile[rtile_index].offset = rtile.offset;
- to_path_trace_rtile[rtile_index].buffers = rtile.buffers;
- to_path_trace_rtile[rtile_index].buffer = rtile.buffer;
- to_path_trace_rtile[rtile_index].rng_state = rtile.rng_state;
- to_path_trace_rtile[rtile_index].x = rtile.x + (tile_iter_x * split_tile_size.x);
- to_path_trace_rtile[rtile_index].y = rtile.y + (tile_iter_y * split_tile_size.y);
- to_path_trace_rtile[rtile_index].buffer_rng_state_stride = rtile.stride;
- /* Fill width and height of the new render tile. */
- to_path_trace_rtile[rtile_index].w = (tile_iter_x == (num_tiles_x - 1)) ?
- (d_w - (tile_iter_x * split_tile_size.x)) /* Border tile */
- : split_tile_size.x;
- to_path_trace_rtile[rtile_index].h = (tile_iter_y == (num_tiles_y - 1)) ?
- (d_h - (tile_iter_y * split_tile_size.y)) /* Border tile */
- : split_tile_size.y;
- to_path_trace_rtile[rtile_index].stride = to_path_trace_rtile[rtile_index].w;
- }
- }
- return to_path_trace_rtile;
- }
-
- void thread_run(DeviceTask *task)
- {
- if(task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
- }
- else if(task->type == DeviceTask::SHADER) {
- shader(*task);
- }
- else if(task->type == DeviceTask::PATH_TRACE) {
- RenderTile tile;
- bool initialize_data_and_check_render_feasibility = false;
- bool need_to_split_tiles_further = false;
- int2 max_render_feasible_tile_size;
- size_t feasible_global_work_size;
- const int2 tile_size = task->requested_tile_size;
- /* Keep rendering tiles until done. */
- while(task->acquire_tile(this, tile)) {
- if(!initialize_data_and_check_render_feasibility) {
- /* Initialize data. */
- /* Calculate per_thread_output_buffer_size. */
- size_t output_buffer_size = 0;
- ciErr = clGetMemObjectInfo((cl_mem)tile.buffer,
- CL_MEM_SIZE,
- sizeof(output_buffer_size),
- &output_buffer_size,
- NULL);
- assert(ciErr == CL_SUCCESS && "Can't get tile.buffer mem object info");
- /* This value is different when running on AMD and NV. */
- if(background) {
- /* In offline render the number of buffer elements
- * associated with tile.buffer is the current tile size.
- */
- per_thread_output_buffer_size =
- output_buffer_size / (tile.w * tile.h);
- }
- else {
- /* interactive rendering, unlike offline render, the number of buffer elements
- * associated with tile.buffer is the entire viewport size.
- */
- per_thread_output_buffer_size =
- output_buffer_size / (tile.buffers->params.width *
- tile.buffers->params.height);
- }
- /* Check render feasibility. */
- feasible_global_work_size = get_feasible_global_work_size(
- tile_size,
- CL_MEM_PTR(const_mem_map["__data"]->device_pointer));
- max_render_feasible_tile_size =
- get_max_render_feasible_tile_size(
- feasible_global_work_size);
- need_to_split_tiles_further =
- need_to_split_tile(tile_size.x,
- tile_size.y,
- max_render_feasible_tile_size);
- initialize_data_and_check_render_feasibility = true;
- }
- if(need_to_split_tiles_further) {
- int2 split_tile_size =
- get_split_tile_size(tile,
- max_render_feasible_tile_size);
- vector<SplitRenderTile> to_path_trace_render_tiles =
- split_tiles(tile, split_tile_size);
- /* Print message to console */
- if(background && (to_path_trace_render_tiles.size() > 1)) {
- fprintf(stderr, "Message : Tiles need to be split "
- "further inside path trace (due to insufficient "
- "device-global-memory for split kernel to "
- "function) \n"
- "The current tile of dimensions %dx%d is split "
- "into tiles of dimension %dx%d for render \n",
- tile.w, tile.h,
- split_tile_size.x,
- split_tile_size.y);
- }
- /* Process all split tiles. */
- for(int tile_iter = 0;
- tile_iter < to_path_trace_render_tiles.size();
- ++tile_iter)
- {
- path_trace(task,
- to_path_trace_render_tiles[tile_iter],
- max_render_feasible_tile_size);
- }
- }
- else {
- /* No splitting required; process the entire tile at once. */
- /* Render feasible tile size is user-set-tile-size itself. */
- max_render_feasible_tile_size.x =
- (((tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_X;
- max_render_feasible_tile_size.y =
- (((tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
- SPLIT_KERNEL_LOCAL_SIZE_Y;
- /* buffer_rng_state_stride is stride itself. */
- SplitRenderTile split_tile(tile);
- split_tile.buffer_rng_state_stride = tile.stride;
- path_trace(task, split_tile, max_render_feasible_tile_size);
- }
- tile.sample = tile.start_sample + tile.num_samples;
-
- /* Complete kernel execution before release tile. */
- /* This helps in multi-device render;
- * The device that reaches the critical-section function
- * release_tile waits (stalling other devices from entering
- * release_tile) for all kernels to complete. If device1 (a
- * slow-render device) reaches release_tile first then it would
- * stall device2 (a fast-render device) from proceeding to render
- * next tile.
- */
- clFinish(cqCommandQueue);
-
- task->release_tile(tile);
- }
- }
- }
-
-protected:
- cl_mem mem_alloc(size_t bufsize, cl_mem_flags mem_flag = CL_MEM_READ_WRITE)
- {
- cl_mem ptr;
- assert(bufsize != 0);
- ptr = clCreateBuffer(cxContext, mem_flag, bufsize, NULL, &ciErr);
- opencl_assert_err(ciErr, "clCreateBuffer");
- return ptr;
- }
-
- /* ** Those guys are for workign around some compiler-specific bugs ** */
-
- cl_program load_cached_kernel(
- const DeviceRequestedFeatures& /*requested_features*/,
- OpenCLCache::ProgramName /*program_name*/,
- thread_scoped_lock /*cache_locker*/)
- {
- VLOG(2) << "Skip loading kernel from cache, "
- << "not supported by split kernel.";
- return NULL;
- }
+#include "device_intern.h"
- void store_cached_kernel(cl_platform_id /*platform*/,
- cl_device_id /*device*/,
- cl_program /*program*/,
- OpenCLCache::ProgramName /*program_name*/,
- thread_scoped_lock& /*slot_locker*/)
- {
- VLOG(2) << "Skip storing kernel in cache, "
- << "not supported by split kernel.";
- }
+#include "util_foreach.h"
+#include "util_logging.h"
- string build_options_for_base_program(
- const DeviceRequestedFeatures& requested_features)
- {
- return requested_features.get_build_options();
- }
-};
+CCL_NAMESPACE_BEGIN
Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background)
{
vector<OpenCLPlatformDevice> usable_devices;
- opencl_get_usable_devices(&usable_devices);
+ OpenCLInfo::get_usable_devices(&usable_devices);
assert(info.num < usable_devices.size());
const OpenCLPlatformDevice& platform_device = usable_devices[info.num];
const string& platform_name = platform_device.platform_name;
const cl_device_type device_type = platform_device.device_type;
- if(opencl_kernel_use_split(platform_name, device_type)) {
+ if(OpenCLInfo::kernel_use_split(platform_name, device_type)) {
VLOG(1) << "Using split kernel.";
- return new OpenCLDeviceSplitKernel(info, stats, background);
+ return opencl_create_split_device(info, stats, background);
} else {
VLOG(1) << "Using mega kernel.";
- return new OpenCLDeviceMegaKernel(info, stats, background);
+ return opencl_create_mega_device(info, stats, background);
}
}
@@ -3298,7 +52,7 @@ bool device_opencl_init(void)
initialized = true;
- if(opencl_device_type() != 0) {
+ if(OpenCLInfo::device_type() != 0) {
int clew_result = clewInit();
if(clew_result == CLEW_SUCCESS) {
VLOG(1) << "CLEW initialization succeeded.";
@@ -3322,24 +76,29 @@ bool device_opencl_init(void)
void device_opencl_info(vector<DeviceInfo>& devices)
{
vector<OpenCLPlatformDevice> usable_devices;
- opencl_get_usable_devices(&usable_devices);
+ OpenCLInfo::get_usable_devices(&usable_devices);
/* Devices are numbered consecutively across platforms. */
int num_devices = 0;
foreach(OpenCLPlatformDevice& platform_device, usable_devices) {
const string& platform_name = platform_device.platform_name;
const cl_device_type device_type = platform_device.device_type;
const string& device_name = platform_device.device_name;
+ string hardware_id = platform_device.hardware_id;
+ if(hardware_id == "") {
+ hardware_id = string_printf("ID_%d", num_devices);
+ }
+
DeviceInfo info;
info.type = DEVICE_OPENCL;
info.description = string_remove_trademark(string(device_name));
info.num = num_devices;
- info.id = string_printf("OPENCL_%d", info.num);
/* We don't know if it's used for display, but assume it is. */
info.display_device = true;
- info.advanced_shading = opencl_kernel_use_advanced_shading(platform_name);
+ info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name);
info.pack_images = true;
- info.use_split_kernel = opencl_kernel_use_split(platform_name,
- device_type);
+ info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name,
+ device_type);
+ info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
devices.push_back(info);
num_devices++;
}
@@ -3347,7 +106,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
string device_opencl_capabilities(void)
{
- if(opencl_device_type() == 0) {
+ if(OpenCLInfo::device_type() == 0) {
return "All OpenCL devices are forced to be OFF";
}
string result = "";
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 1f1128a..48d1803 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -19,6 +19,8 @@
#include "device_task.h"
+#include "buffers.h"
+
#include "util_algorithm.h"
#include "util_time.h"
@@ -99,14 +101,18 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
}
}
-void DeviceTask::update_progress(RenderTile *rtile)
+void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
if((type != PATH_TRACE) &&
(type != SHADER))
return;
- if(update_progress_sample)
- update_progress_sample();
+ if(update_progress_sample) {
+ if(pixel_samples == -1) {
+ pixel_samples = shader_w;
+ }
+ update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
+ }
if(update_tile_sample) {
double current_time = time_dt();
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 8423e83..8bd54c3 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -56,10 +56,10 @@ public:
int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask>& tasks, int num, int max_size = 0);
- void update_progress(RenderTile *rtile);
+ void update_progress(RenderTile *rtile, int pixel_samples = -1);
function<bool(Device *device, RenderTile&)> acquire_tile;
- function<void(void)> update_progress_sample;
+ function<void(long, int)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
function<bool(void)> get_cancel;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
new file mode 100644
index 0000000..4023ba8
--- /dev/null
+++ b/intern/cycles/device/opencl/opencl.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#include "device.h"
+
+#include "util_map.h"
+#include "util_param.h"
+#include "util_string.h"
+
+#include "clew.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
+
+/* Macro declarations used with split kernel */
+
+/* Macro to enable/disable work-stealing */
+#define __WORK_STEALING__
+
+#define SPLIT_KERNEL_LOCAL_SIZE_X 64
+#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
+
+/* This value may be tuned according to the scene we are rendering.
+ *
+ * Modifying PATH_ITER_INC_FACTOR value proportional to number of expected
+ * ray-bounces will improve performance.
+ */
+#define PATH_ITER_INC_FACTOR 8
+
+/* When allocate global memory in chunks. We may not be able to
+ * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
+ * Since some bytes may be needed for aligning chunks of memory;
+ * This is the amount of memory that we dedicate for that purpose.
+ */
+#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
+
+struct OpenCLPlatformDevice {
+ OpenCLPlatformDevice(cl_platform_id platform_id,
+ const string& platform_name,
+ cl_device_id device_id,
+ cl_device_type device_type,
+ const string& device_name,
+ const string& hardware_id)
+ : platform_id(platform_id),
+ platform_name(platform_name),
+ device_id(device_id),
+ device_type(device_type),
+ device_name(device_name),
+ hardware_id(hardware_id) {}
+ cl_platform_id platform_id;
+ string platform_name;
+ cl_device_id device_id;
+ cl_device_type device_type;
+ string device_name;
+ string hardware_id;
+};
+
+/* Contains all static OpenCL helper functions. */
+class OpenCLInfo
+{
+public:
+ static cl_device_type device_type();
+ static bool use_debug();
+ static bool kernel_use_advanced_shading(const string& platform_name);
+ static bool kernel_use_split(const string& platform_name,
+ const cl_device_type device_type);
+ static bool device_supported(const string& platform_name,
+ const cl_device_id device_id);
+ static bool platform_version_check(cl_platform_id platform,
+ string *error = NULL);
+ static bool device_version_check(cl_device_id device,
+ string *error = NULL);
+ static string get_hardware_id(string platform_name,
+ cl_device_id device_id);
+ static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
+ bool force_all = false);
+};
+
+/* Thread safe cache for contexts and programs.
+ */
+class OpenCLCache
+{
+ struct Slot
+ {
+ struct ProgramEntry
+ {
+ ProgramEntry();
+ ProgramEntry(const ProgramEntry& rhs);
+ ~ProgramEntry();
+ cl_program program;
+ thread_mutex *mutex;
+ };
+
+ Slot();
+ Slot(const Slot& rhs);
+ ~Slot();
+
+ thread_mutex *context_mutex;
+ cl_context context;
+ typedef map<ustring, ProgramEntry> EntryMap;
+ EntryMap programs;
+
+ };
+
+ /* key is combination of platform ID and device ID */
+ typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
+
+ /* map of Slot objects */
+ typedef map<PlatformDevicePair, Slot> CacheMap;
+ CacheMap cache;
+
+ /* MD5 hash of the kernel source. */
+ string kernel_md5;
+
+ thread_mutex cache_lock;
+ thread_mutex kernel_md5_lock;
+
+ /* lazy instantiate */
+ static OpenCLCache& global_instance();
+
+public:
+
+ enum ProgramName {
+ OCL_DEV_BASE_PROGRAM,
+ OCL_DEV_MEGAKERNEL_PROGRAM,
+ };
+
+ /* Lookup context in the cache. If this returns NULL, slot_locker
+ * will be holding a lock for the cache. slot_locker should refer to a
+ * default constructed thread_scoped_lock. */
+ static cl_context get_context(cl_platform_id platform,
+ cl_device_id device,
+ thread_scoped_lock& slot_locker);
+ /* Same as above. */
+ static cl_program get_program(cl_platform_id platform,
+ cl_device_id device,
+ ustring key,
+ thread_scoped_lock& slot_locker);
+
+ /* Store context in the cache. You MUST have tried to get the item before storing to it. */
+ static void store_context(cl_platform_id platform,
+ cl_device_id device,
+ cl_context context,
+ thread_scoped_lock& slot_locker);
+ /* Same as above. */
+ static void store_program(cl_platform_id platform,
+ cl_device_id device,
+ cl_program program,
+ ustring key,
+ thread_scoped_lock& slot_locker);
+
+ static string get_kernel_md5();
+};
+
+#define opencl_assert(stmt) \
+ { \
+ cl_int err = stmt; \
+ \
+ if(err != CL_SUCCESS) { \
+ string message = string_printf("OpenCL error: %s in %s", clewErrorString(err), #stmt); \
+ if(error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } (void)0
+
+class OpenCLDeviceBase : public Device
+{
+public:
+ DedicatedTaskPool task_pool;
+ cl_context cxContext;
+ cl_command_queue cqCommandQueue;
+ cl_platform_id cpPlatform;
+ cl_device_id cdDevice;
+ cl_int ciErr;
+
+ class OpenCLProgram {
+ public:
+ OpenCLProgram() : loaded(false), device(NULL) {}
+ OpenCLProgram(OpenCLDeviceBase *device,
+ string program_name,
+ string kernel_name,
+ string kernel_build_options,
+ bool use_stdout = true);
+ ~OpenCLProgram();
+
+ void add_kernel(ustring name);
+ void load();
+
+ bool is_loaded() { return loaded; }
+ string get_log() { return log; }
+ void report_error();
+
+ cl_kernel operator()();
+ cl_kernel operator()(ustring name);
+
+ void release();
+
+ private:
+ bool build_kernel(const string *debug_src);
+ bool compile_kernel(const string *debug_src);
+ bool load_binary(const string& clbin, const string *debug_src = NULL);
+ bool save_binary(const string& clbin);
+
+ void add_log(string msg, bool is_debug);
+ void add_error(string msg);
+
+ bool loaded;
+ cl_program program;
+ OpenCLDeviceBase *device;
+
+ /* Used for the OpenCLCache key. */
+ string program_name;
+
+ string kernel_file, kernel_build_options, device_md5;
+
+ bool use_stdout;
+ string log, error_msg;
+ string compile_output;
+
+ map<ustring, cl_kernel> kernels;
+ };
+
+ OpenCLProgram base_program;
+
+ typedef map<string, device_vector<uchar>*> ConstMemMap;
+ typedef map<string, device_ptr> MemMap;
+
+ ConstMemMap const_mem_map;
+ MemMap mem_map;
+ device_ptr null_mem;
+
+ bool device_initialized;
+ string platform_name;
+
+ bool opencl_error(cl_int err);
+ void opencl_error(const string& message);
+ void opencl_assert_err(cl_int err, const char* where);
+
+ OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_);
+ ~OpenCLDeviceBase();
+
+ static void CL_CALLBACK context_notify_callback(const char *err_info,
+ const void * /*private_info*/, size_t /*cb*/, void *user_data);
+
+ bool opencl_version_check();
+
+ string device_md5_hash(string kernel_custom_build_options = "");
+ bool load_kernels(const DeviceRequestedFeatures& requested_features);
+
+ /* Has to be implemented by the real device classes.
+ * The base device will then load all these programs. */
+ virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
+ vector<OpenCLProgram*> &programs) = 0;
+
+ void mem_alloc(device_memory& mem, MemoryType type);
+ void mem_copy_to(device_memory& mem);
+ void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
+ void mem_zero(device_memory& mem);
+ void mem_free(device_memory& mem);
+ void const_copy_to(const char *name, void *host, size_t size);
+ void tex_alloc(const char *name,
+ device_memory& mem,
+ InterpolationType /*interpolation*/,
+ ExtensionType /*extension*/);
+ void tex_free(device_memory& mem);
+
+ size_t global_size_round_up(int group_size, int global_size);
+ void enqueue_kernel(cl_kernel kernel, size_t w, size_t h);
+ void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
+
+ void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
+ void shader(DeviceTask& task);
+
+ class OpenCLDeviceTask : public DeviceTask {
+ public:
+ OpenCLDeviceTask(OpenCLDeviceBase *device, DeviceTask& task)
+ : DeviceTask(task)
+ {
+ run = function_bind(&OpenCLDeviceBase::thread_run,
+ device,
+ this);
+ }
+ };
+
+ int get_split_task_count(DeviceTask& /*task*/)
+ {
+ return 1;
+ }
+
+ void task_add(DeviceTask& task)
+ {
+ task_pool.push(new OpenCLDeviceTask(this, task));
+ }
+
+ void task_wait()
+ {
+ task_pool.wait();
+ }
+
+ void task_cancel()
+ {
+ task_pool.cancel();
+ }
+
+ virtual void thread_run(DeviceTask * /*task*/) = 0;
+
+protected:
+ string kernel_build_options(const string *debug_src = NULL);
+
+ class ArgumentWrapper {
+ public:
+ ArgumentWrapper() : size(0), pointer(NULL) {}
+ template <typename T>
+ ArgumentWrapper(T& argument) : size(sizeof(argument)),
+ pointer(&argument) { }
+ ArgumentWrapper(int argument) : size(sizeof(int)),
+ int_value(argument),
+ pointer(&int_value) { }
+ ArgumentWrapper(float argument) : size(sizeof(float)),
+ float_value(argument),
+ pointer(&float_value) { }
+ size_t size;
+ int int_value;
+ float float_value;
+ void *pointer;
+ };
+
+ /* TODO(sergey): In the future we can use variadic templates, once
+ * C++0x is allowed. Should allow to clean this up a bit.
+ */
+ int kernel_set_args(cl_kernel kernel,
+ int start_argument_index,
+ const ArgumentWrapper& arg1 = ArgumentWrapper(),
+ const ArgumentWrapper& arg2 = ArgumentWrapper(),
+ const ArgumentWrapper& arg3 = ArgumentWrapper(),
+ const ArgumentWrapper& arg4 = ArgumentWrapper(),
+ const ArgumentWrapper& arg5 = ArgumentWrapper(),
+ const ArgumentWrapper& arg6 = ArgumentWrapper(),
+ const ArgumentWrapper& arg7 = ArgumentWrapper(),
+ const ArgumentWrapper& arg8 = ArgumentWrapper(),
+ const ArgumentWrapper& arg9 = ArgumentWrapper(),
+ const ArgumentWrapper& arg10 = ArgumentWrapper(),
+ const ArgumentWrapper& arg11 = ArgumentWrapper(),
+ const ArgumentWrapper& arg12 = ArgumentWrapper(),
+ const ArgumentWrapper& arg13 = ArgumentWrapper(),
+ const ArgumentWrapper& arg14 = ArgumentWrapper(),
+ const ArgumentWrapper& arg15 = ArgumentWrapper(),
+ const ArgumentWrapper& arg16 = ArgumentWrapper(),
+ const ArgumentWrapper& arg17 = ArgumentWrapper(),
+ const ArgumentWrapper& arg18 = ArgumentWrapper(),
+ const ArgumentWrapper& arg19 = ArgumentWrapper(),
+ const ArgumentWrapper& arg20 = ArgumentWrapper(),
+ const ArgumentWrapper& arg21 = ArgumentWrapper(),
+ const ArgumentWrapper& arg22 = ArgumentWrapper(),
+ const ArgumentWrapper& arg23 = ArgumentWrapper(),
+ const ArgumentWrapper& arg24 = ArgumentWrapper(),
+ const ArgumentWrapper& arg25 = ArgumentWrapper(),
+ const ArgumentWrapper& arg26 = ArgumentWrapper(),
+ const ArgumentWrapper& arg27 = ArgumentWrapper(),
+ const ArgumentWrapper& arg28 = ArgumentWrapper(),
+ const ArgumentWrapper& arg29 = ArgumentWrapper(),
+ const ArgumentWrapper& arg30 = ArgumentWrapper(),
+ const ArgumentWrapper& arg31 = ArgumentWrapper(),
+ const ArgumentWrapper& arg32 = ArgumentWrapper(),
+ const ArgumentWrapper& arg33 = ArgumentWrapper());
+
+ void release_kernel_safe(cl_kernel kernel);
+ void release_mem_object_safe(cl_mem mem);
+ void release_program_safe(cl_program program);
+
+ /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+ virtual cl_program load_cached_kernel(
+ ustring key,
+ thread_scoped_lock& cache_locker);
+
+ virtual void store_cached_kernel(
+ cl_program program,
+ ustring key,
+ thread_scoped_lock& cache_locker);
+
+ virtual string build_options_for_base_program(
+ const DeviceRequestedFeatures& /*requested_features*/);
+};
+
+Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background);
+Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, bool background);
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
new file mode 100644
index 0000000..a2b9003
--- /dev/null
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -0,0 +1,741 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#include "opencl.h"
+
+#include "kernel_types.h"
+
+#include "util_foreach.h"
+#include "util_logging.h"
+#include "util_md5.h"
+#include "util_path.h"
+#include "util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool OpenCLDeviceBase::opencl_error(cl_int err)
+{
+ if(err != CL_SUCCESS) {
+ string message = string_printf("OpenCL error (%d): %s", err, clewErrorString(err));
+ if(error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ return true;
+ }
+
+ return false;
+}
+
+void OpenCLDeviceBase::opencl_error(const string& message)
+{
+ if(error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+}
+
+void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where)
+{
+ if(err != CL_SUCCESS) {
+ string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
+ if(error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+#ifndef NDEBUG
+ abort();
+#endif
+ }
+}
+
+OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
+: Device(info, stats, background_)
+{
+ cpPlatform = NULL;
+ cdDevice = NULL;
+ cxContext = NULL;
+ cqCommandQueue = NULL;
+ null_mem = 0;
+ device_initialized = false;
+
+ vector<OpenCLPlatformDevice> usable_devices;
+ OpenCLInfo::get_usable_devices(&usable_devices);
+ if(usable_devices.size() == 0) {
+ opencl_error("OpenCL: no devices found.");
+ return;
+ }
+ assert(info.num < usable_devices.size());
+ OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+ cpPlatform = platform_device.platform_id;
+ cdDevice = platform_device.device_id;
+ platform_name = platform_device.platform_name;
+ VLOG(2) << "Creating new Cycles device for OpenCL platform "
+ << platform_name << ", device "
+ << platform_device.device_name << ".";
+
+ {
+ /* try to use cached context */
+ thread_scoped_lock cache_locker;
+ cxContext = OpenCLCache::get_context(cpPlatform, cdDevice, cache_locker);
+
+ if(cxContext == NULL) {
+ /* create context properties array to specify platform */
+ const cl_context_properties context_props[] = {
+ CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
+ 0, 0
+ };
+
+ /* create context */
+ cxContext = clCreateContext(context_props, 1, &cdDevice,
+ context_notify_callback, cdDevice, &ciErr);
+
+ if(opencl_error(ciErr)) {
+ opencl_error("OpenCL: clCreateContext failed");
+ return;
+ }
+
+ /* cache it */
+ OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker);
+ }
+ }
+
+ cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
+ if(opencl_error(ciErr))
+ return;
+
+ null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
+ if(opencl_error(ciErr))
+ return;
+
+ fprintf(stderr, "Device init success\n");
+ device_initialized = true;
+}
+
+OpenCLDeviceBase::~OpenCLDeviceBase()
+{
+ task_pool.stop();
+
+ if(null_mem)
+ clReleaseMemObject(CL_MEM_PTR(null_mem));
+
+ ConstMemMap::iterator mt;
+ for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
+ mem_free(*(mt->second));
+ delete mt->second;
+ }
+
+ base_program.release();
+ if(cqCommandQueue)
+ clReleaseCommandQueue(cqCommandQueue);
+ if(cxContext)
+ clReleaseContext(cxContext);
+}
+
+void CL_CALLBACK OpenCLDeviceBase::context_notify_callback(const char *err_info,
+ const void * /*private_info*/, size_t /*cb*/, void *user_data)
+{
+ char name[256];
+ clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL);
+
+ fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info);
+}
+
+bool OpenCLDeviceBase::opencl_version_check()
+{
+ string error;
+ if(!OpenCLInfo::platform_version_check(cpPlatform, &error)) {
+ opencl_error(error);
+ return false;
+ }
+ if(!OpenCLInfo::device_version_check(cdDevice, &error)) {
+ opencl_error(error);
+ return false;
+ }
+ return true;
+}
+
+string OpenCLDeviceBase::device_md5_hash(string kernel_custom_build_options)
+{
+ MD5Hash md5;
+ char version[256], driver[256], name[256], vendor[256];
+
+ clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL);
+ clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
+ clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL);
+ clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(driver), &driver, NULL);
+
+ md5.append((uint8_t*)vendor, strlen(vendor));
+ md5.append((uint8_t*)version, strlen(version));
+ md5.append((uint8_t*)name, strlen(name));
+ md5.append((uint8_t*)driver, strlen(driver));
+
+ string options = kernel_build_options();
+ options += kernel_custom_build_options;
+ md5.append((uint8_t*)options.c_str(), options.size());
+
+ return md5.get_hex();
+}
+
+bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_features)
+{
+ /* Verify if device was initialized. */
+ if(!device_initialized) {
+ fprintf(stderr, "OpenCL: failed to initialize device.\n");
+ return false;
+ }
+
+ /* Verify we have right opencl version. */
+ if(!opencl_version_check())
+ return false;
+
+ base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
+ base_program.add_kernel(ustring("convert_to_byte"));
+ base_program.add_kernel(ustring("convert_to_half_float"));
+ base_program.add_kernel(ustring("shader"));
+ base_program.add_kernel(ustring("bake"));
+
+ vector<OpenCLProgram*> programs;
+ programs.push_back(&base_program);
+ /* Call actual class to fill the vector with its programs. */
+ load_kernels(requested_features, programs);
+
+ /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
+ * serialize the calls internally, so it's not much use right now.
+ * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
+ * should be set to false as well. */
+#if 0
+ TaskPool task_pool;
+ foreach(OpenCLProgram *program, programs) {
+ task_pool.push(function_bind(&OpenCLProgram::load, program));
+ }
+ task_pool.wait_work();
+
+ foreach(OpenCLProgram *program, programs) {
+ VLOG(2) << program->get_log();
+ if(!program->is_loaded()) {
+ program->report_error();
+ return false;
+ }
+ }
+#else
+ foreach(OpenCLProgram *program, programs) {
+ program->load();
+ if(!program->is_loaded()) {
+ return false;
+ }
+ }
+#endif
+
+ return true;
+}
+
+void OpenCLDeviceBase::mem_alloc(device_memory& mem, MemoryType type)
+{
+ size_t size = mem.memory_size();
+
+ cl_mem_flags mem_flag;
+ void *mem_ptr = NULL;
+
+ if(type == MEM_READ_ONLY)
+ mem_flag = CL_MEM_READ_ONLY;
+ else if(type == MEM_WRITE_ONLY)
+ mem_flag = CL_MEM_WRITE_ONLY;
+ else
+ mem_flag = CL_MEM_READ_WRITE;
+
+ /* Zero-size allocation might be invoked by render, but not really
+ * supported by OpenCL. Using NULL as device pointer also doesn't really
+ * work for some reason, so for the time being we'll use special case
+ * will null_mem buffer.
+ */
+ if(size != 0) {
+ mem.device_pointer = (device_ptr)clCreateBuffer(cxContext,
+ mem_flag,
+ size,
+ mem_ptr,
+ &ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer");
+ }
+ else {
+ mem.device_pointer = null_mem;
+ }
+
+ stats.mem_alloc(size);
+ mem.device_size = size;
+}
+
+void OpenCLDeviceBase::mem_copy_to(device_memory& mem)
+{
+ /* this is blocking */
+ size_t size = mem.memory_size();
+ if(size != 0) {
+ opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
+ CL_MEM_PTR(mem.device_pointer),
+ CL_TRUE,
+ 0,
+ size,
+ (void*)mem.data_pointer,
+ 0,
+ NULL, NULL));
+ }
+}
+
+void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
+{
+ size_t offset = elem*y*w;
+ size_t size = elem*w*h;
+ assert(size != 0);
+ opencl_assert(clEnqueueReadBuffer(cqCommandQueue,
+ CL_MEM_PTR(mem.device_pointer),
+ CL_TRUE,
+ offset,
+ size,
+ (uchar*)mem.data_pointer + offset,
+ 0,
+ NULL, NULL));
+}
+
+void OpenCLDeviceBase::mem_zero(device_memory& mem)
+{
+ if(mem.device_pointer) {
+ memset((void*)mem.data_pointer, 0, mem.memory_size());
+ mem_copy_to(mem);
+ }
+}
+
+void OpenCLDeviceBase::mem_free(device_memory& mem)
+{
+ if(mem.device_pointer) {
+ if(mem.device_pointer != null_mem) {
+ opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
+ }
+ mem.device_pointer = 0;
+
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+}
+
+void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
+{
+ ConstMemMap::iterator i = const_mem_map.find(name);
+
+ if(i == const_mem_map.end()) {
+ device_vector<uchar> *data = new device_vector<uchar>();
+ data->copy((uchar*)host, size);
+
+ mem_alloc(*data, MEM_READ_ONLY);
+ i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
+ }
+ else {
+ device_vector<uchar> *data = i->second;
+ data->copy((uchar*)host, size);
+ }
+
+ mem_copy_to(*i->second);
+}
+
+void OpenCLDeviceBase::tex_alloc(const char *name,
+ device_memory& mem,
+ InterpolationType /*interpolation*/,
+ ExtensionType /*extension*/)
+{
+ VLOG(1) << "Texture allocate: " << name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+ mem_alloc(mem, MEM_READ_ONLY);
+ mem_copy_to(mem);
+ assert(mem_map.find(name) == mem_map.end());
+ mem_map.insert(MemMap::value_type(name, mem.device_pointer));
+}
+
+void OpenCLDeviceBase::tex_free(device_memory& mem)
+{
+ if(mem.device_pointer) {
+ foreach(const MemMap::value_type& value, mem_map) {
+ if(value.second == mem.device_pointer) {
+ mem_map.erase(value.first);
+ break;
+ }
+ }
+
+ mem_free(mem);
+ }
+}
+
+size_t OpenCLDeviceBase::global_size_round_up(int group_size, int global_size)
+{
+ int r = global_size % group_size;
+ return global_size + ((r == 0)? 0: group_size - r);
+}
+
+void OpenCLDeviceBase::enqueue_kernel(cl_kernel kernel, size_t w, size_t h)
+{
+ size_t workgroup_size, max_work_items[3];
+
+ clGetKernelWorkGroupInfo(kernel, cdDevice,
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
+ clGetDeviceInfo(cdDevice,
+ CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
+
+ /* Try to divide evenly over 2 dimensions. */
+ size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
+ size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
+
+ /* Some implementations have max size 1 on 2nd dimension. */
+ if(local_size[1] > max_work_items[1]) {
+ local_size[0] = workgroup_size/max_work_items[1];
+ local_size[1] = max_work_items[1];
+ }
+
+ size_t global_size[2] = {global_size_round_up(local_size[0], w),
+ global_size_round_up(local_size[1], h)};
+
+ /* Vertical size of 1 is coming from bake/shade kernels where we should
+ * not round anything up because otherwise we'll either be doing too
+ * much work per pixel (if we don't check global ID on Y axis) or will
+ * be checking for global ID to always have Y of 0.
+ */
+ if(h == 1) {
+ global_size[h] = 1;
+ }
+
+ /* run kernel */
+ opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
+ opencl_assert(clFlush(cqCommandQueue));
+}
+
+void OpenCLDeviceBase::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
+{
+ cl_mem ptr;
+
+ MemMap::iterator i = mem_map.find(name);
+ if(i != mem_map.end()) {
+ ptr = CL_MEM_PTR(i->second);
+ }
+ else {
+ /* work around NULL not working, even though the spec says otherwise */
+ ptr = CL_MEM_PTR(null_mem);
+ }
+
+ opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr));
+}
+
+void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
+{
+ /* cast arguments to cl types */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_rgba = (rgba_byte)? CL_MEM_PTR(rgba_byte): CL_MEM_PTR(rgba_half);
+ cl_mem d_buffer = CL_MEM_PTR(buffer);
+ cl_int d_x = task.x;
+ cl_int d_y = task.y;
+ cl_int d_w = task.w;
+ cl_int d_h = task.h;
+ cl_float d_sample_scale = 1.0f/(task.sample + 1);
+ cl_int d_offset = task.offset;
+ cl_int d_stride = task.stride;
+
+
+ cl_kernel ckFilmConvertKernel = (rgba_byte)? base_program(ustring("convert_to_byte")): base_program(ustring("convert_to_half_float"));
+
+ cl_uint start_arg_index =
+ kernel_set_args(ckFilmConvertKernel,
+ 0,
+ d_data,
+ d_rgba,
+ d_buffer);
+
+#define KERNEL_TEX(type, ttype, name) \
+set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+
+ start_arg_index += kernel_set_args(ckFilmConvertKernel,
+ start_arg_index,
+ d_sample_scale,
+ d_x,
+ d_y,
+ d_w,
+ d_h,
+ d_offset,
+ d_stride);
+
+ enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
+}
+
+void OpenCLDeviceBase::shader(DeviceTask& task)
+{
+ /* cast arguments to cl types */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_input = CL_MEM_PTR(task.shader_input);
+ cl_mem d_output = CL_MEM_PTR(task.shader_output);
+ cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
+ cl_int d_shader_eval_type = task.shader_eval_type;
+ cl_int d_shader_filter = task.shader_filter;
+ cl_int d_shader_x = task.shader_x;
+ cl_int d_shader_w = task.shader_w;
+ cl_int d_offset = task.offset;
+
+ cl_kernel kernel;
+
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE)
+ kernel = base_program(ustring("bake"));
+ else
+ kernel = base_program(ustring("shader"));
+
+ cl_uint start_arg_index =
+ kernel_set_args(kernel,
+ 0,
+ d_data,
+ d_input,
+ d_output);
+
+ if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_output_luma);
+ }
+
+#define KERNEL_TEX(type, ttype, name) \
+ set_kernel_arg_mem(kernel, &start_arg_index, #name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_shader_eval_type);
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_shader_filter);
+ }
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_shader_x,
+ d_shader_w,
+ d_offset);
+
+ for(int sample = 0; sample < task.num_samples; sample++) {
+
+ if(task.get_cancel())
+ break;
+
+ kernel_set_args(kernel, start_arg_index, sample);
+
+ enqueue_kernel(kernel, task.shader_w, 1);
+
+ clFinish(cqCommandQueue);
+
+ task.update_progress(NULL);
+ }
+}
+
+string OpenCLDeviceBase::kernel_build_options(const string *debug_src)
+{
+ string build_options = "-cl-fast-relaxed-math ";
+
+ if(platform_name == "NVIDIA CUDA") {
+ build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
+ "-cl-nv-maxrregcount=32 "
+ "-cl-nv-verbose ";
+
+ uint compute_capability_major, compute_capability_minor;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
+ sizeof(cl_uint), &compute_capability_major, NULL);
+ clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
+ sizeof(cl_uint), &compute_capability_minor, NULL);
+
+ build_options += string_printf("-D__COMPUTE_CAPABILITY__=%u ",
+ compute_capability_major * 100 +
+ compute_capability_minor * 10);
+ }
+
+ else if(platform_name == "Apple")
+ build_options += "-D__KERNEL_OPENCL_APPLE__ ";
+
+ else if(platform_name == "AMD Accelerated Parallel Processing")
+ build_options += "-D__KERNEL_OPENCL_AMD__ ";
+
+ else if(platform_name == "Intel(R) OpenCL") {
+ build_options += "-D__KERNEL_OPENCL_INTEL_CPU__ ";
+
+ /* Options for gdb source level kernel debugging.
+ * this segfaults on linux currently.
+ */
+ if(OpenCLInfo::use_debug() && debug_src)
+ build_options += "-g -s \"" + *debug_src + "\" ";
+ }
+
+ if(OpenCLInfo::use_debug())
+ build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
+
+#ifdef WITH_CYCLES_DEBUG
+ build_options += "-D__KERNEL_DEBUG__ ";
+#endif
+
+ return build_options;
+}
+
+/* TODO(sergey): In the future we can use variadic templates, once
+ * C++0x is allowed. Should allow to clean this up a bit.
+ */
+int OpenCLDeviceBase::kernel_set_args(cl_kernel kernel,
+ int start_argument_index,
+ const ArgumentWrapper& arg1,
+ const ArgumentWrapper& arg2,
+ const ArgumentWrapper& arg3,
+ const ArgumentWrapper& arg4,
+ const ArgumentWrapper& arg5,
+ const ArgumentWrapper& arg6,
+ const ArgumentWrapper& arg7,
+ const ArgumentWrapper& arg8,
+ const ArgumentWrapper& arg9,
+ const ArgumentWrapper& arg10,
+ const ArgumentWrapper& arg11,
+ const ArgumentWrapper& arg12,
+ const ArgumentWrapper& arg13,
+ const ArgumentWrapper& arg14,
+ const ArgumentWrapper& arg15,
+ const ArgumentWrapper& arg16,
+ const ArgumentWrapper& arg17,
+ const ArgumentWrapper& arg18,
+ const ArgumentWrapper& arg19,
+ const ArgumentWrapper& arg20,
+ const ArgumentWrapper& arg21,
+ const ArgumentWrapper& arg22,
+ const ArgumentWrapper& arg23,
+ const ArgumentWrapper& arg24,
+ const ArgumentWrapper& arg25,
+ const ArgumentWrapper& arg26,
+ const ArgumentWrapper& arg27,
+ const ArgumentWrapper& arg28,
+ const ArgumentWrapper& arg29,
+ const ArgumentWrapper& arg30,
+ const ArgumentWrapper& arg31,
+ const ArgumentWrapper& arg32,
+ const ArgumentWrapper& arg33)
+{
+ int current_arg_index = 0;
+#define FAKE_VARARG_HANDLE_ARG(arg) \
+ do { \
+ if(arg.pointer != NULL) { \
+ opencl_assert(clSetKernelArg( \
+ kernel, \
+ start_argument_index + current_arg_index, \
+ arg.size, arg.pointer)); \
+ ++current_arg_index; \
+ } \
+ else { \
+ return current_arg_index; \
+ } \
+ } while(false)
+ FAKE_VARARG_HANDLE_ARG(arg1);
+ FAKE_VARARG_HANDLE_ARG(arg2);
+ FAKE_VARARG_HANDLE_ARG(arg3);
+ FAKE_VARARG_HANDLE_ARG(arg4);
+ FAKE_VARARG_HANDLE_ARG(arg5);
+ FAKE_VARARG_HANDLE_ARG(arg6);
+ FAKE_VARARG_HANDLE_ARG(arg7);
+ FAKE_VARARG_HANDLE_ARG(arg8);
+ FAKE_VARARG_HANDLE_ARG(arg9);
+ FAKE_VARARG_HANDLE_ARG(arg10);
+ FAKE_VARARG_HANDLE_ARG(arg11);
+ FAKE_VARARG_HANDLE_ARG(arg12);
+ FAKE_VARARG_HANDLE_ARG(arg13);
+ FAKE_VARARG_HANDLE_ARG(arg14);
+ FAKE_VARARG_HANDLE_ARG(arg15);
+ FAKE_VARARG_HANDLE_ARG(arg16);
+ FAKE_VARARG_HANDLE_ARG(arg17);
+ FAKE_VARARG_HANDLE_ARG(arg18);
+ FAKE_VARARG_HANDLE_ARG(arg19);
+ FAKE_VARARG_HANDLE_ARG(arg20);
+ FAKE_VARARG_HANDLE_ARG(arg21);
+ FAKE_VARARG_HANDLE_ARG(arg22);
+ FAKE_VARARG_HANDLE_ARG(arg23);
+ FAKE_VARARG_HANDLE_ARG(arg24);
+ FAKE_VARARG_HANDLE_ARG(arg25);
+ FAKE_VARARG_HANDLE_ARG(arg26);
+ FAKE_VARARG_HANDLE_ARG(arg27);
+ FAKE_VARARG_HANDLE_ARG(arg28);
+ FAKE_VARARG_HANDLE_ARG(arg29);
+ FAKE_VARARG_HANDLE_ARG(arg30);
+ FAKE_VARARG_HANDLE_ARG(arg31);
+ FAKE_VARARG_HANDLE_ARG(arg32);
+ FAKE_VARARG_HANDLE_ARG(arg33);
+#undef FAKE_VARARG_HANDLE_ARG
+ return current_arg_index;
+}
+
+void OpenCLDeviceBase::release_kernel_safe(cl_kernel kernel)
+{
+ if(kernel) {
+ clReleaseKernel(kernel);
+ }
+}
+
+void OpenCLDeviceBase::release_mem_object_safe(cl_mem mem)
+{
+ if(mem != NULL) {
+ clReleaseMemObject(mem);
+ }
+}
+
+void OpenCLDeviceBase::release_program_safe(cl_program program)
+{
+ if(program) {
+ clReleaseProgram(program);
+ }
+}
+
+/* ** Those guys are for workign around some compiler-specific bugs ** */
+
+cl_program OpenCLDeviceBase::load_cached_kernel(
+ ustring key,
+ thread_scoped_lock& cache_locker)
+{
+ return OpenCLCache::get_program(cpPlatform,
+ cdDevice,
+ key,
+ cache_locker);
+}
+
+void OpenCLDeviceBase::store_cached_kernel(
+ cl_program program,
+ ustring key,
+ thread_scoped_lock& cache_locker)
+{
+ OpenCLCache::store_program(cpPlatform,
+ cdDevice,
+ program,
+ key,
+ cache_locker);
+}
+
+string OpenCLDeviceBase::build_options_for_base_program(
+ const DeviceRequestedFeatures& /*requested_features*/)
+{
+ /* TODO(sergey): By default we compile all features, meaning
+ * mega kernel is not getting feature-based optimizations.
+ *
+ * Ideally we need always compile kernel with as less features
+ * enabled as possible to keep performance at it's max.
+ */
+ return "";
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
new file mode 100644
index 0000000..6ea7619
--- /dev/null
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#include "opencl.h"
+
+#include "buffers.h"
+
+#include "kernel_types.h"
+
+#include "util_md5.h"
+#include "util_path.h"
+#include "util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
+{
+public:
+ OpenCLProgram path_trace_program;
+
+ OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, bool background_)
+ : OpenCLDeviceBase(info, stats, background_),
+ path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+ {
+ }
+
+ virtual bool show_samples() const {
+ return true;
+ }
+
+ virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+ vector<OpenCLProgram*> &programs)
+ {
+ path_trace_program.add_kernel(ustring("path_trace"));
+ programs.push_back(&path_trace_program);
+ }
+
+ ~OpenCLDeviceMegaKernel()
+ {
+ task_pool.stop();
+ path_trace_program.release();
+ }
+
+ void path_trace(RenderTile& rtile, int sample)
+ {
+ /* Cast arguments to cl types. */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
+ cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
+ cl_int d_x = rtile.x;
+ cl_int d_y = rtile.y;
+ cl_int d_w = rtile.w;
+ cl_int d_h = rtile.h;
+ cl_int d_offset = rtile.offset;
+ cl_int d_stride = rtile.stride;
+
+ /* Sample arguments. */
+ cl_int d_sample = sample;
+
+ cl_kernel ckPathTraceKernel = path_trace_program(ustring("path_trace"));
+
+ cl_uint start_arg_index =
+ kernel_set_args(ckPathTraceKernel,
+ 0,
+ d_data,
+ d_buffer,
+ d_rng_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+ set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+
+ start_arg_index += kernel_set_args(ckPathTraceKernel,
+ start_arg_index,
+ d_sample,
+ d_x,
+ d_y,
+ d_w,
+ d_h,
+ d_offset,
+ d_stride);
+
+ enqueue_kernel(ckPathTraceKernel, d_w, d_h);
+ }
+
+ void thread_run(DeviceTask *task)
+ {
+ if(task->type == DeviceTask::FILM_CONVERT) {
+ film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ }
+ else if(task->type == DeviceTask::SHADER) {
+ shader(*task);
+ }
+ else if(task->type == DeviceTask::PATH_TRACE) {
+ RenderTile tile;
+ /* Keep rendering tiles until done. */
+ while(task->acquire_tile(this, tile)) {
+ int start_sample = tile.start_sample;
+ int end_sample = tile.start_sample + tile.num_samples;
+
+ for(int sample = start_sample; sample < end_sample; sample++) {
+ if(task->get_cancel()) {
+ if(task->need_finish_queue == false)
+ break;
+ }
+
+ path_trace(tile, sample);
+
+ tile.sample = sample + 1;
+
+ task->update_progress(&tile, tile.w*tile.h);
+ }
+
+ /* Complete kernel execution before release tile */
+ /* This helps in multi-device render;
+ * The device that reaches the critical-section function
+ * release_tile waits (stalling other devices from entering
+ * release_tile) for all kernels to complete. If device1 (a
+ * slow-render device) reaches release_tile first then it would
+ * stall device2 (a fast-render device) from proceeding to render
+ * next tile.
+ */
+ clFinish(cqCommandQueue);
+
+ task->release_tile(tile);
+ }
+ }
+ }
+};
+
+Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background)
+{
+ return new OpenCLDeviceMegaKernel(info, stats, background);
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
new file mode 100644
index 0000000..3c3c215
--- /dev/null
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -0,0 +1,1311 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#include "opencl.h"
+
+#include "buffers.h"
+
+#include "kernel_types.h"
+
+#include "util_md5.h"
+#include "util_path.h"
+#include "util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO(sergey): This is to keep tile split on OpenCL level working
+ * for now, since without this view-port render does not work as it
+ * should.
+ *
+ * Ideally it'll be done on the higher level, but we need to get ready
+ * for merge rather soon, so let's keep split logic private here in
+ * the file.
+ */
+class SplitRenderTile : public RenderTile {
+public:
+ SplitRenderTile()
+ : RenderTile(),
+ buffer_offset_x(0),
+ buffer_offset_y(0),
+ rng_state_offset_x(0),
+ rng_state_offset_y(0),
+ buffer_rng_state_stride(0) {}
+
+ explicit SplitRenderTile(RenderTile& tile)
+ : RenderTile(),
+ buffer_offset_x(0),
+ buffer_offset_y(0),
+ rng_state_offset_x(0),
+ rng_state_offset_y(0),
+ buffer_rng_state_stride(0)
+ {
+ x = tile.x;
+ y = tile.y;
+ w = tile.w;
+ h = tile.h;
+ start_sample = tile.start_sample;
+ num_samples = tile.num_samples;
+ sample = tile.sample;
+ resolution = tile.resolution;
+ offset = tile.offset;
+ stride = tile.stride;
+ buffer = tile.buffer;
+ rng_state = tile.rng_state;
+ buffers = tile.buffers;
+ }
+
+ /* Split kernel is device global memory constrained;
+ * hence split kernel cant render big tile size's in
+ * one go. If the user sets a big tile size (big tile size
+ * is a term relative to the available device global memory),
+ * we split the tile further and then call path_trace on
+ * each of those split tiles. The following variables declared,
+ * assist in achieving that purpose
+ */
+ int buffer_offset_x;
+ int buffer_offset_y;
+ int rng_state_offset_x;
+ int rng_state_offset_y;
+ int buffer_rng_state_stride;
+};
+
+/* OpenCLDeviceSplitKernel's declaration/definition. */
+class OpenCLDeviceSplitKernel : public OpenCLDeviceBase
+{
+public:
+ /* Kernel declaration. */
+ OpenCLProgram program_data_init;
+ OpenCLProgram program_scene_intersect;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_queue_enqueue;
+ OpenCLProgram program_background_buffer_update;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked;
+ OpenCLProgram program_next_iteration_setup;
+ OpenCLProgram program_sum_all_radiance;
+
+ /* Global memory variables [porting]; These memory is used for
+ * co-operation between different kernels; Data written by one
+ * kernel will be available to another kernel via this global
+ * memory.
+ */
+ cl_mem rng_coop;
+ cl_mem throughput_coop;
+ cl_mem L_transparent_coop;
+ cl_mem PathRadiance_coop;
+ cl_mem Ray_coop;
+ cl_mem PathState_coop;
+ cl_mem Intersection_coop;
+ cl_mem kgbuffer; /* KernelGlobals buffer. */
+
+ /* Global buffers for ShaderData. */
+ cl_mem sd; /* ShaderData used in the main path-iteration loop. */
+ cl_mem sd_DL_shadow; /* ShaderData used in Direct Lighting and
+ * shadow_blocked kernel.
+ */
+
+ /* Global memory required for shadow blocked and accum_radiance. */
+ cl_mem BSDFEval_coop;
+ cl_mem ISLamp_coop;
+ cl_mem LightRay_coop;
+ cl_mem AOAlpha_coop;
+ cl_mem AOBSDF_coop;
+ cl_mem AOLightRay_coop;
+ cl_mem Intersection_coop_shadow;
+
+#ifdef WITH_CYCLES_DEBUG
+ /* DebugData memory */
+ cl_mem debugdata_coop;
+#endif
+
+ /* Global state array that tracks ray state. */
+ cl_mem ray_state;
+
+ /* Per sample buffers. */
+ cl_mem per_sample_output_buffers;
+
+ /* Denotes which sample each ray is being processed for. */
+ cl_mem work_array;
+
+ /* Queue */
+ cl_mem Queue_data; /* Array of size queuesize * num_queues * sizeof(int). */
+ cl_mem Queue_index; /* Array of size num_queues * sizeof(int);
+ * Tracks the size of each queue.
+ */
+
+ /* Flag to make sceneintersect and lampemission kernel use queues. */
+ cl_mem use_queues_flag;
+
+ /* Amount of memory in output buffer associated with one pixel/thread. */
+ size_t per_thread_output_buffer_size;
+
+ /* Total allocatable available device memory. */
+ size_t total_allocatable_memory;
+
+ /* host version of ray_state; Used in checking host path-iteration
+ * termination.
+ */
+ char *hostRayStateArray;
+
+ /* Number of path-iterations to be done in one shot. */
+ unsigned int PathIteration_times;
+
+#ifdef __WORK_STEALING__
+ /* Work pool with respect to each work group. */
+ cl_mem work_pool_wgs;
+
+ /* Denotes the maximum work groups possible w.r.t. current tile size. */
+ unsigned int max_work_groups;
+#endif
+
+ /* clos_max value for which the kernels have been loaded currently. */
+ int current_max_closure;
+
+ /* Marked True in constructor and marked false at the end of path_trace(). */
+ bool first_tile;
+
+ OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_)
+ : OpenCLDeviceBase(info, stats, background_)
+ {
+ background = background_;
+
+ /* Initialize cl_mem variables. */
+ kgbuffer = NULL;
+ sd = NULL;
+ sd_DL_shadow = NULL;
+
+ rng_coop = NULL;
+ throughput_coop = NULL;
+ L_transparent_coop = NULL;
+ PathRadiance_coop = NULL;
+ Ray_coop = NULL;
+ PathState_coop = NULL;
+ Intersection_coop = NULL;
+ ray_state = NULL;
+
+ AOAlpha_coop = NULL;
+ AOBSDF_coop = NULL;
+ AOLightRay_coop = NULL;
+ BSDFEval_coop = NULL;
+ ISLamp_coop = NULL;
+ LightRay_coop = NULL;
+ Intersection_coop_shadow = NULL;
+
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop = NULL;
+#endif
+
+ work_array = NULL;
+
+ /* Queue. */
+ Queue_data = NULL;
+ Queue_index = NULL;
+ use_queues_flag = NULL;
+
+ per_sample_output_buffers = NULL;
+
+ per_thread_output_buffer_size = 0;
+ hostRayStateArray = NULL;
+ PathIteration_times = PATH_ITER_INC_FACTOR;
+#ifdef __WORK_STEALING__
+ work_pool_wgs = NULL;
+ max_work_groups = 0;
+#endif
+ current_max_closure = -1;
+ first_tile = true;
+
+ /* Get device's maximum memory that can be allocated. */
+ ciErr = clGetDeviceInfo(cdDevice,
+ CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(size_t),
+ &total_allocatable_memory,
+ NULL);
+ assert(ciErr == CL_SUCCESS);
+ if(platform_name == "AMD Accelerated Parallel Processing") {
+ /* This value is tweak-able; AMD platform does not seem to
+ * give maximum performance when all of CL_DEVICE_MAX_MEM_ALLOC_SIZE
+ * is considered for further computation.
+ */
+ total_allocatable_memory /= 2;
+ }
+ }
+
+ virtual bool show_samples() const {
+ return false;
+ }
+
+ /* Split kernel utility functions. */
+ size_t get_tex_size(const char *tex_name)
+ {
+ cl_mem ptr;
+ size_t ret_size = 0;
+ MemMap::iterator i = mem_map.find(tex_name);
+ if(i != mem_map.end()) {
+ ptr = CL_MEM_PTR(i->second);
+ ciErr = clGetMemObjectInfo(ptr,
+ CL_MEM_SIZE,
+ sizeof(ret_size),
+ &ret_size,
+ NULL);
+ assert(ciErr == CL_SUCCESS);
+ }
+ return ret_size;
+ }
+
+ size_t get_shader_data_size(size_t max_closure)
+ {
+ /* ShaderData size with variable size ShaderClosure array */
+ return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure));
+ }
+
+ /* Returns size of KernelGlobals structure associated with OpenCL. */
+ size_t get_KernelGlobals_size()
+ {
+ /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
+ * fetch its size.
+ */
+ typedef struct KernelGlobals {
+ ccl_constant KernelData *data;
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name;
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+ void *sd_input;
+ void *isect_shadow;
+ } KernelGlobals;
+
+ return sizeof(KernelGlobals);
+ }
+
+ virtual void load_kernels(const DeviceRequestedFeatures& requested_features,
+ vector<OpenCLProgram*> &programs)
+ {
+ string build_options = "-D__SPLIT_KERNEL__ ";
+#ifdef __WORK_STEALING__
+ build_options += "-D__WORK_STEALING__ ";
+#endif
+ build_options += requested_features.get_build_options();
+
+ /* Set compute device build option. */
+ cl_device_type device_type;
+ ciErr = clGetDeviceInfo(cdDevice,
+ CL_DEVICE_TYPE,
+ sizeof(cl_device_type),
+ &device_type,
+ NULL);
+ assert(ciErr == CL_SUCCESS);
+ if(device_type == CL_DEVICE_TYPE_GPU) {
+ build_options += " -D__COMPUTE_DEVICE_GPU__";
+ }
+
+#define GLUE(a, b) a ## b
+#define LOAD_KERNEL(name) \
+ do { \
+ GLUE(program_, name) = OpenCLProgram(this, "split_" #name, "kernel_" #name ".cl", build_options); \
+ GLUE(program_, name).add_kernel(ustring("path_trace_" #name)); \
+ programs.push_back(&GLUE(program_, name)); \
+ } while(false)
+
+ LOAD_KERNEL(data_init);
+ LOAD_KERNEL(scene_intersect);
+ LOAD_KERNEL(lamp_emission);
+ LOAD_KERNEL(queue_enqueue);
+ LOAD_KERNEL(background_buffer_update);
+ LOAD_KERNEL(shader_eval);
+ LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
+ LOAD_KERNEL(direct_lighting);
+ LOAD_KERNEL(shadow_blocked);
+ LOAD_KERNEL(next_iteration_setup);
+ LOAD_KERNEL(sum_all_radiance);
+
+#undef FIND_KERNEL
+#undef GLUE
+
+ current_max_closure = requested_features.max_closure;
+ }
+
+ ~OpenCLDeviceSplitKernel()
+ {
+ task_pool.stop();
+
+ /* Release kernels */
+ program_data_init.release();
+ program_scene_intersect.release();
+ program_lamp_emission.release();
+ program_queue_enqueue.release();
+ program_background_buffer_update.release();
+ program_shader_eval.release();
+ program_holdout_emission_blurring_pathtermination_ao.release();
+ program_direct_lighting.release();
+ program_shadow_blocked.release();
+ program_next_iteration_setup.release();
+ program_sum_all_radiance.release();
+
+ /* Release global memory */
+ release_mem_object_safe(rng_coop);
+ release_mem_object_safe(throughput_coop);
+ release_mem_object_safe(L_transparent_coop);
+ release_mem_object_safe(PathRadiance_coop);
+ release_mem_object_safe(Ray_coop);
+ release_mem_object_safe(PathState_coop);
+ release_mem_object_safe(Intersection_coop);
+ release_mem_object_safe(kgbuffer);
+ release_mem_object_safe(sd);
+ release_mem_object_safe(sd_DL_shadow);
+ release_mem_object_safe(ray_state);
+ release_mem_object_safe(AOAlpha_coop);
+ release_mem_object_safe(AOBSDF_coop);
+ release_mem_object_safe(AOLightRay_coop);
+ release_mem_object_safe(BSDFEval_coop);
+ release_mem_object_safe(ISLamp_coop);
+ release_mem_object_safe(LightRay_coop);
+ release_mem_object_safe(Intersection_coop_shadow);
+#ifdef WITH_CYCLES_DEBUG
+ release_mem_object_safe(debugdata_coop);
+#endif
+ release_mem_object_safe(use_queues_flag);
+ release_mem_object_safe(Queue_data);
+ release_mem_object_safe(Queue_index);
+ release_mem_object_safe(work_array);
+#ifdef __WORK_STEALING__
+ release_mem_object_safe(work_pool_wgs);
+#endif
+ release_mem_object_safe(per_sample_output_buffers);
+
+ if(hostRayStateArray != NULL) {
+ free(hostRayStateArray);
+ }
+ }
+
+ void path_trace(DeviceTask *task,
+ SplitRenderTile& rtile,
+ int2 max_render_feasible_tile_size)
+ {
+ /* cast arguments to cl types */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
+ cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
+ cl_int d_x = rtile.x;
+ cl_int d_y = rtile.y;
+ cl_int d_w = rtile.w;
+ cl_int d_h = rtile.h;
+ cl_int d_offset = rtile.offset;
+ cl_int d_stride = rtile.stride;
+
+ /* Make sure that set render feasible tile size is a multiple of local
+ * work size dimensions.
+ */
+ assert(max_render_feasible_tile_size.x % SPLIT_KERNEL_LOCAL_SIZE_X == 0);
+ assert(max_render_feasible_tile_size.y % SPLIT_KERNEL_LOCAL_SIZE_Y == 0);
+
+ size_t global_size[2];
+ size_t local_size[2] = {SPLIT_KERNEL_LOCAL_SIZE_X,
+ SPLIT_KERNEL_LOCAL_SIZE_Y};
+
+ /* Set the range of samples to be processed for every ray in
+ * path-regeneration logic.
+ */
+ cl_int start_sample = rtile.start_sample;
+ cl_int end_sample = rtile.start_sample + rtile.num_samples;
+ cl_int num_samples = rtile.num_samples;
+
+#ifdef __WORK_STEALING__
+ global_size[0] = (((d_w - 1) / local_size[0]) + 1) * local_size[0];
+ global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
+ unsigned int num_parallel_samples = 1;
+#else
+ global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
+ unsigned int num_threads = max_render_feasible_tile_size.x *
+ max_render_feasible_tile_size.y;
+ unsigned int num_tile_columns_possible = num_threads / global_size[1];
+ /* Estimate number of parallel samples that can be
+ * processed in parallel.
+ */
+ unsigned int num_parallel_samples = min(num_tile_columns_possible / d_w,
+ rtile.num_samples);
+ /* Wavefront size in AMD is 64.
+ * TODO(sergey): What about other platforms?
+ */
+ if(num_parallel_samples >= 64) {
+ /* TODO(sergey): Could use generic round-up here. */
+ num_parallel_samples = (num_parallel_samples / 64) * 64;
+ }
+ assert(num_parallel_samples != 0);
+
+ global_size[0] = d_w * num_parallel_samples;
+#endif /* __WORK_STEALING__ */
+
+ assert(global_size[0] * global_size[1] <=
+ max_render_feasible_tile_size.x * max_render_feasible_tile_size.y);
+
+ /* Allocate all required global memory once. */
+ if(first_tile) {
+ size_t num_global_elements = max_render_feasible_tile_size.x *
+ max_render_feasible_tile_size.y;
+ /* TODO(sergey): This will actually over-allocate if
+ * particular kernel does not support multiclosure.
+ */
+ size_t shaderdata_size = get_shader_data_size(current_max_closure);
+
+#ifdef __WORK_STEALING__
+ /* Calculate max groups */
+ size_t max_global_size[2];
+ size_t tile_x = max_render_feasible_tile_size.x;
+ size_t tile_y = max_render_feasible_tile_size.y;
+ max_global_size[0] = (((tile_x - 1) / local_size[0]) + 1) * local_size[0];
+ max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * local_size[1];
+ max_work_groups = (max_global_size[0] * max_global_size[1]) /
+ (local_size[0] * local_size[1]);
+ /* Allocate work_pool_wgs memory. */
+ work_pool_wgs = mem_alloc(max_work_groups * sizeof(unsigned int));
+#endif /* __WORK_STEALING__ */
+
+ /* Allocate queue_index memory only once. */
+ Queue_index = mem_alloc(NUM_QUEUES * sizeof(int));
+ use_queues_flag = mem_alloc(sizeof(char));
+ kgbuffer = mem_alloc(get_KernelGlobals_size());
+
+ /* Create global buffers for ShaderData. */
+ sd = mem_alloc(num_global_elements * shaderdata_size);
+ sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size);
+
+ /* Creation of global memory buffers which are shared among
+ * the kernels.
+ */
+ rng_coop = mem_alloc(num_global_elements * sizeof(RNG));
+ throughput_coop = mem_alloc(num_global_elements * sizeof(float3));
+ L_transparent_coop = mem_alloc(num_global_elements * sizeof(float));
+ PathRadiance_coop = mem_alloc(num_global_elements * sizeof(PathRadiance));
+ Ray_coop = mem_alloc(num_global_elements * sizeof(Ray));
+ PathState_coop = mem_alloc(num_global_elements * sizeof(PathState));
+ Intersection_coop = mem_alloc(num_global_elements * sizeof(Intersection));
+ AOAlpha_coop = mem_alloc(num_global_elements * sizeof(float3));
+ AOBSDF_coop = mem_alloc(num_global_elements * sizeof(float3));
+ AOLightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
+ BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval));
+ ISLamp_coop = mem_alloc(num_global_elements * sizeof(int));
+ LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
+ Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection));
+
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData));
+#endif
+
+ ray_state = mem_alloc(num_global_elements * sizeof(char));
+
+ hostRayStateArray = (char *)calloc(num_global_elements, sizeof(char));
+ assert(hostRayStateArray != NULL && "Can't create hostRayStateArray memory");
+
+ Queue_data = mem_alloc(num_global_elements * (NUM_QUEUES * sizeof(int)+sizeof(int)));
+ work_array = mem_alloc(num_global_elements * sizeof(unsigned int));
+ per_sample_output_buffers = mem_alloc(num_global_elements *
+ per_thread_output_buffer_size);
+ }
+
+ cl_int dQueue_size = global_size[0] * global_size[1];
+
+ cl_uint start_arg_index =
+ kernel_set_args(program_data_init(),
+ 0,
+ kgbuffer,
+ sd_DL_shadow,
+ d_data,
+ per_sample_output_buffers,
+ d_rng_state,
+ rng_coop,
+ throughput_coop,
+ L_transparent_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop_shadow,
+ ray_state);
+
+/* TODO(sergey): Avoid map lookup here. */
+#define KERNEL_TEX(type, ttype, name) \
+ set_kernel_arg_mem(program_data_init(), &start_arg_index, #name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+
+ start_arg_index +=
+ kernel_set_args(program_data_init(),
+ start_arg_index,
+ start_sample,
+ d_x,
+ d_y,
+ d_w,
+ d_h,
+ d_offset,
+ d_stride,
+ rtile.rng_state_offset_x,
+ rtile.rng_state_offset_y,
+ rtile.buffer_rng_state_stride,
+ Queue_data,
+ Queue_index,
+ dQueue_size,
+ use_queues_flag,
+ work_array,
+#ifdef __WORK_STEALING__
+ work_pool_wgs,
+ num_samples,
+#endif
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop,
+#endif
+ num_parallel_samples);
+
+ kernel_set_args(program_scene_intersect(),
+ 0,
+ kgbuffer,
+ d_data,
+ rng_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop,
+ ray_state,
+ d_w,
+ d_h,
+ Queue_data,
+ Queue_index,
+ dQueue_size,
+ use_queues_flag,
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop,
+#endif
+ num_parallel_samples);
+
+ kernel_set_args(program_lamp_emission(),
+ 0,
+ kgbuffer,
+ d_data,
+ throughput_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop,
+ ray_state,
+ d_w,
+ d_h,
+ Queue_data,
+ Queue_index,
+ dQueue_size,
+ use_queues_flag,
+ num_parallel_samples);
+
+ kernel_set_args(program_queue_enqueue(),
+ 0,
+ Queue_data,
+ Queue_index,
+ ray_state,
+ dQueue_size);
+
+ kernel_set_args(program_background_buffer_update(),
+ 0,
+ kgbuffer,
+ d_data,
+ per_sample_output_buffers,
+ d_rng_state,
+ rng_coop,
+ throughput_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ L_transparent_coop,
+ ray_state,
+ d_w,
+ d_h,
+ d_x,
+ d_y,
+ d_stride,
+ rtile.rng_state_offset_x,
+ rtile.rng_state_offset_y,
+ rtile.buffer_rng_state_stride,
+ work_array,
+ Queue_data,
+ Queue_index,
+ dQueue_size,
+ end_sample,
+ start_sample,
+#ifdef __WORK_STEALING__
+ work_pool_wgs,
+ num_samples,
+#endif
+#ifdef WITH_CYCLES_DEBUG
+ debugdata_coop,
+#endif
+ num_parallel_samples);
+
+ kernel_set_args(program_shader_eval(),
+ 0,
+ kgbuffer,
+ d_data,
+ sd,
+ rng_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop,
+ ray_state,
+ Queue_data,
+ Queue_index,
+ dQueue_size);
+
+ kernel_set_args(program_holdout_emission_blurring_pathtermination_ao(),
+ 0,
+ kgbuffer,
+ d_data,
+ sd,
+ per_sample_output_buffers,
+ rng_coop,
+ throughput_coop,
+ L_transparent_coop,
+ PathRadiance_coop,
+ PathState_coop,
+ Intersection_coop,
+ AOAlpha_coop,
+ AOBSDF_coop,
+ AOLightRay_coop,
+ d_w,
+ d_h,
+ d_x,
+ d_y,
+ d_stride,
+ ray_state,
+ work_array,
+ Queue_data,
+ Queue_index,
+ dQueue_size,
+#ifdef __WORK_STEALING__
+ start_sample,
+#endif
+ num_parallel_samples);
+
+ kernel_set_args(program_direct_lighting(),
+ 0,
+ kgbuffer,
+ d_data,
+ sd,
+ rng_coop,
+ PathState_coop,
+ ISLamp_coop,
+ LightRay_coop,
+ BSDFEval_coop,
+ ray_state,
+ Queue_data,
+ Queue_index,
+ dQueue_size);
+
+ kernel_set_args(program_shadow_blocked(),
+ 0,
+ kgbuffer,
+ d_data,
+ PathState_coop,
+ LightRay_coop,
+ AOLightRay_coop,
+ ray_state,
+ Queue_data,
+ Queue_index,
+ dQueue_size);
+
+ kernel_set_args(program_next_iteration_setup(),
+ 0,
+ kgbuffer,
+ d_data,
+ sd,
+ rng_coop,
+ throughput_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ LightRay_coop,
+ ISLamp_coop,
+ BSDFEval_coop,
+ AOLightRay_coop,
+ AOBSDF_coop,
+ AOAlpha_coop,
+ ray_state,
+ Queue_data,
+ Queue_index,
+ dQueue_size,
+ use_queues_flag);
+
+ kernel_set_args(program_sum_all_radiance(),
+ 0,
+ d_data,
+ d_buffer,
+ per_sample_output_buffers,
+ num_parallel_samples,
+ d_w,
+ d_h,
+ d_stride,
+ rtile.buffer_offset_x,
+ rtile.buffer_offset_y,
+ rtile.buffer_rng_state_stride,
+ start_sample);
+
+ /* Macro for Enqueuing split kernels. */
+#define GLUE(a, b) a ## b
+#define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \
+ { \
+ ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \
+ GLUE(program_, \
+ kernelName)(), \
+ 2, \
+ NULL, \
+ globalSize, \
+ localSize, \
+ 0, \
+ NULL, \
+ NULL); \
+ opencl_assert_err(ciErr, "clEnqueueNDRangeKernel"); \
+ if(ciErr != CL_SUCCESS) { \
+ string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", \
+ clewErrorString(ciErr)); \
+ opencl_error(message); \
+ return; \
+ } \
+ } (void) 0
+
+ /* Enqueue ckPathTraceKernel_data_init kernel. */
+ ENQUEUE_SPLIT_KERNEL(data_init, global_size, local_size);
+ bool activeRaysAvailable = true;
+
+ /* Record number of time host intervention has been made */
+ unsigned int numHostIntervention = 0;
+ unsigned int numNextPathIterTimes = PathIteration_times;
+ bool canceled = false;
+ while(activeRaysAvailable) {
+ /* Twice the global work size of other kernels for
+ * ckPathTraceKernel_shadow_blocked_direct_lighting. */
+ size_t global_size_shadow_blocked[2];
+ global_size_shadow_blocked[0] = global_size[0] * 2;
+ global_size_shadow_blocked[1] = global_size[1];
+
+ /* Do path-iteration in host [Enqueue Path-iteration kernels. */
+ for(int PathIter = 0; PathIter < PathIteration_times; PathIter++) {
+ ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
+ ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
+
+ if(task->get_cancel()) {
+ canceled = true;
+ break;
+ }
+ }
+
+ /* Read ray-state into Host memory to decide if we should exit
+ * path-iteration in host.
+ */
+ ciErr = clEnqueueReadBuffer(cqCommandQueue,
+ ray_state,
+ CL_TRUE,
+ 0,
+ global_size[0] * global_size[1] * sizeof(char),
+ hostRayStateArray,
+ 0,
+ NULL,
+ NULL);
+ assert(ciErr == CL_SUCCESS);
+
+ activeRaysAvailable = false;
+
+ for(int rayStateIter = 0;
+ rayStateIter < global_size[0] * global_size[1];
+ ++rayStateIter)
+ {
+ if(int8_t(hostRayStateArray[rayStateIter]) != RAY_INACTIVE) {
+ /* Not all rays are RAY_INACTIVE. */
+ activeRaysAvailable = true;
+ break;
+ }
+ }
+
+ if(activeRaysAvailable) {
+ numHostIntervention++;
+ PathIteration_times = PATH_ITER_INC_FACTOR;
+ /* Host intervention done before all rays become RAY_INACTIVE;
+ * Set do more initial iterations for the next tile.
+ */
+ numNextPathIterTimes += PATH_ITER_INC_FACTOR;
+ }
+
+ if(task->get_cancel()) {
+ canceled = true;
+ break;
+ }
+ }
+
+ /* Execute SumALLRadiance kernel to accumulate radiance calculated in
+ * per_sample_output_buffers into RenderTile's output buffer.
+ */
+ if(!canceled) {
+ size_t sum_all_radiance_local_size[2] = {16, 16};
+ size_t sum_all_radiance_global_size[2];
+ sum_all_radiance_global_size[0] =
+ (((d_w - 1) / sum_all_radiance_local_size[0]) + 1) *
+ sum_all_radiance_local_size[0];
+ sum_all_radiance_global_size[1] =
+ (((d_h - 1) / sum_all_radiance_local_size[1]) + 1) *
+ sum_all_radiance_local_size[1];
+ ENQUEUE_SPLIT_KERNEL(sum_all_radiance,
+ sum_all_radiance_global_size,
+ sum_all_radiance_local_size);
+ }
+
+#undef ENQUEUE_SPLIT_KERNEL
+#undef GLUE
+
+ if(numHostIntervention == 0) {
+ /* This means that we are executing kernel more than required
+ * Must avoid this for the next sample/tile.
+ */
+ PathIteration_times = ((numNextPathIterTimes - PATH_ITER_INC_FACTOR) <= 0) ?
+ PATH_ITER_INC_FACTOR : numNextPathIterTimes - PATH_ITER_INC_FACTOR;
+ }
+ else {
+ /* Number of path-iterations done for this tile is set as
+ * Initial path-iteration times for the next tile
+ */
+ PathIteration_times = numNextPathIterTimes;
+ }
+
+ first_tile = false;
+ }
+
+ /* Calculates the amount of memory that has to be always
+ * allocated in order for the split kernel to function.
+ * This memory is tile/scene-property invariant (meaning,
+ * the value returned by this function does not depend
+ * on the user set tile size or scene properties.
+ */
+ size_t get_invariable_mem_allocated()
+ {
+ size_t total_invariable_mem_allocated = 0;
+ size_t KernelGlobals_size = 0;
+
+ KernelGlobals_size = get_KernelGlobals_size();
+
+ total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */
+ total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */
+ total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */
+
+ return total_invariable_mem_allocated;
+ }
+
+ /* Calculate the memory that has-to-be/has-been allocated for
+ * the split kernel to function.
+ */
+ size_t get_tile_specific_mem_allocated(const int2 tile_size)
+ {
+ size_t tile_specific_mem_allocated = 0;
+
+ /* Get required tile info */
+ unsigned int user_set_tile_w = tile_size.x;
+ unsigned int user_set_tile_h = tile_size.y;
+
+#ifdef __WORK_STEALING__
+ /* Calculate memory to be allocated for work_pools in
+ * case of work_stealing.
+ */
+ size_t max_global_size[2];
+ size_t max_num_work_pools = 0;
+ max_global_size[0] =
+ (((user_set_tile_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ max_global_size[1] =
+ (((user_set_tile_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ max_num_work_pools =
+ (max_global_size[0] * max_global_size[1]) /
+ (SPLIT_KERNEL_LOCAL_SIZE_X * SPLIT_KERNEL_LOCAL_SIZE_Y);
+ tile_specific_mem_allocated += max_num_work_pools * sizeof(unsigned int);
+#endif
+
+ tile_specific_mem_allocated +=
+ user_set_tile_w * user_set_tile_h * per_thread_output_buffer_size;
+ tile_specific_mem_allocated +=
+ user_set_tile_w * user_set_tile_h * sizeof(RNG);
+
+ return tile_specific_mem_allocated;
+ }
+
+ /* Calculates the texture memories and KernelData (d_data) memory
+ * that has been allocated.
+ */
+ size_t get_scene_specific_mem_allocated(cl_mem d_data)
+ {
+ size_t scene_specific_mem_allocated = 0;
+ /* Calculate texture memories. */
+#define KERNEL_TEX(type, ttype, name) \
+ scene_specific_mem_allocated += get_tex_size(#name);
+#include "kernel_textures.h"
+#undef KERNEL_TEX
+ size_t d_data_size;
+ ciErr = clGetMemObjectInfo(d_data,
+ CL_MEM_SIZE,
+ sizeof(d_data_size),
+ &d_data_size,
+ NULL);
+ assert(ciErr == CL_SUCCESS && "Can't get d_data mem object info");
+ scene_specific_mem_allocated += d_data_size;
+ return scene_specific_mem_allocated;
+ }
+
+ /* Calculate the memory required for one thread in split kernel. */
+ size_t get_per_thread_memory()
+ {
+ size_t shaderdata_size = 0;
+ /* TODO(sergey): This will actually over-allocate if
+ * particular kernel does not support multiclosure.
+ */
+ shaderdata_size = get_shader_data_size(current_max_closure);
+ size_t retval = sizeof(RNG)
+ + sizeof(float3) /* Throughput size */
+ + sizeof(float) /* L transparent size */
+ + sizeof(char) /* Ray state size */
+ + sizeof(unsigned int) /* Work element size */
+ + sizeof(int) /* ISLamp_size */
+ + sizeof(PathRadiance) + sizeof(Ray) + sizeof(PathState)
+ + sizeof(Intersection) /* Overall isect */
+ + sizeof(Intersection) /* Instersection_coop_AO */
+ + sizeof(Intersection) /* Intersection coop DL */
+ + shaderdata_size /* Overall ShaderData */
+ + (shaderdata_size * 2) /* ShaderData : DL and shadow */
+ + sizeof(Ray) + sizeof(BsdfEval)
+ + sizeof(float3) /* AOAlpha size */
+ + sizeof(float3) /* AOBSDF size */
+ + sizeof(Ray)
+ + (sizeof(int) * NUM_QUEUES)
+ + per_thread_output_buffer_size;
+ return retval;
+ }
+
+ /* Considers the total memory available in the device and
+ * and returns the maximum global work size possible.
+ */
+ size_t get_feasible_global_work_size(int2 tile_size, cl_mem d_data)
+ {
+ /* Calculate invariably allocated memory. */
+ size_t invariable_mem_allocated = get_invariable_mem_allocated();
+ /* Calculate tile specific allocated memory. */
+ size_t tile_specific_mem_allocated =
+ get_tile_specific_mem_allocated(tile_size);
+ /* Calculate scene specific allocated memory. */
+ size_t scene_specific_mem_allocated =
+ get_scene_specific_mem_allocated(d_data);
+ /* Calculate total memory available for the threads in global work size. */
+ size_t available_memory = total_allocatable_memory
+ - invariable_mem_allocated
+ - tile_specific_mem_allocated
+ - scene_specific_mem_allocated
+ - DATA_ALLOCATION_MEM_FACTOR;
+ size_t per_thread_memory_required = get_per_thread_memory();
+ return (available_memory / per_thread_memory_required);
+ }
+
+ /* Checks if the device has enough memory to render the whole tile;
+ * If not, we should split single tile into multiple tiles of small size
+ * and process them all.
+ */
+ bool need_to_split_tile(unsigned int d_w,
+ unsigned int d_h,
+ int2 max_render_feasible_tile_size)
+ {
+ size_t global_size_estimate[2];
+ /* TODO(sergey): Such round-ups are in quite few places, need to replace
+ * them with an utility macro.
+ */
+ global_size_estimate[0] =
+ (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ global_size_estimate[1] =
+ (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ if((global_size_estimate[0] * global_size_estimate[1]) >
+ (max_render_feasible_tile_size.x * max_render_feasible_tile_size.y))
+ {
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /* Considers the scene properties, global memory available in the device
+ * and returns a rectanglular tile dimension (approx the maximum)
+ * that should render on split kernel.
+ */
+ int2 get_max_render_feasible_tile_size(size_t feasible_global_work_size)
+ {
+ int2 max_render_feasible_tile_size;
+ int square_root_val = (int)sqrt(feasible_global_work_size);
+ max_render_feasible_tile_size.x = square_root_val;
+ max_render_feasible_tile_size.y = square_root_val;
+ /* Ciel round-off max_render_feasible_tile_size. */
+ int2 ceil_render_feasible_tile_size;
+ ceil_render_feasible_tile_size.x =
+ (((max_render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ ceil_render_feasible_tile_size.y =
+ (((max_render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ if(ceil_render_feasible_tile_size.x * ceil_render_feasible_tile_size.y <=
+ feasible_global_work_size)
+ {
+ return ceil_render_feasible_tile_size;
+ }
+ /* Floor round-off max_render_feasible_tile_size. */
+ int2 floor_render_feasible_tile_size;
+ floor_render_feasible_tile_size.x =
+ (max_render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ floor_render_feasible_tile_size.y =
+ (max_render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ return floor_render_feasible_tile_size;
+ }
+
+ /* Try splitting the current tile into multiple smaller
+ * almost-square-tiles.
+ */
+ int2 get_split_tile_size(RenderTile rtile,
+ int2 max_render_feasible_tile_size)
+ {
+ int2 split_tile_size;
+ int num_global_threads = max_render_feasible_tile_size.x *
+ max_render_feasible_tile_size.y;
+ int d_w = rtile.w;
+ int d_h = rtile.h;
+ /* Ceil round off d_w and d_h */
+ d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ while(d_w * d_h > num_global_threads) {
+ /* Halve the longer dimension. */
+ if(d_w >= d_h) {
+ d_w = d_w / 2;
+ d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ }
+ else {
+ d_h = d_h / 2;
+ d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ }
+ }
+ split_tile_size.x = d_w;
+ split_tile_size.y = d_h;
+ return split_tile_size;
+ }
+
+ /* Splits existing tile into multiple tiles of tile size split_tile_size. */
+ vector<SplitRenderTile> split_tiles(RenderTile rtile, int2 split_tile_size)
+ {
+ vector<SplitRenderTile> to_path_trace_rtile;
+ int d_w = rtile.w;
+ int d_h = rtile.h;
+ int num_tiles_x = (((d_w - 1) / split_tile_size.x) + 1);
+ int num_tiles_y = (((d_h - 1) / split_tile_size.y) + 1);
+ /* Buffer and rng_state offset calc. */
+ size_t offset_index = rtile.offset + (rtile.x + rtile.y * rtile.stride);
+ size_t offset_x = offset_index % rtile.stride;
+ size_t offset_y = offset_index / rtile.stride;
+ /* Resize to_path_trace_rtile. */
+ to_path_trace_rtile.resize(num_tiles_x * num_tiles_y);
+ for(int tile_iter_y = 0; tile_iter_y < num_tiles_y; tile_iter_y++) {
+ for(int tile_iter_x = 0; tile_iter_x < num_tiles_x; tile_iter_x++) {
+ int rtile_index = tile_iter_y * num_tiles_x + tile_iter_x;
+ to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x * split_tile_size.x;
+ to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y * split_tile_size.y;
+ to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x * split_tile_size.x;
+ to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y * split_tile_size.y;
+ to_path_trace_rtile[rtile_index].start_sample = rtile.start_sample;
+ to_path_trace_rtile[rtile_index].num_samples = rtile.num_samples;
+ to_path_trace_rtile[rtile_index].sample = rtile.sample;
+ to_path_trace_rtile[rtile_index].resolution = rtile.resolution;
+ to_path_trace_rtile[rtile_index].offset = rtile.offset;
+ to_path_trace_rtile[rtile_index].buffers = rtile.buffers;
+ to_path_trace_rtile[rtile_index].buffer = rtile.buffer;
+ to_path_trace_rtile[rtile_index].rng_state = rtile.rng_state;
+ to_path_trace_rtile[rtile_index].x = rtile.x + (tile_iter_x * split_tile_size.x);
+ to_path_trace_rtile[rtile_index].y = rtile.y + (tile_iter_y * split_tile_size.y);
+ to_path_trace_rtile[rtile_index].buffer_rng_state_stride = rtile.stride;
+ /* Fill width and height of the new render tile. */
+ to_path_trace_rtile[rtile_index].w = (tile_iter_x == (num_tiles_x - 1)) ?
+ (d_w - (tile_iter_x * split_tile_size.x)) /* Border tile */
+ : split_tile_size.x;
+ to_path_trace_rtile[rtile_index].h = (tile_iter_y == (num_tiles_y - 1)) ?
+ (d_h - (tile_iter_y * split_tile_size.y)) /* Border tile */
+ : split_tile_size.y;
+ to_path_trace_rtile[rtile_index].stride = to_path_trace_rtile[rtile_index].w;
+ }
+ }
+ return to_path_trace_rtile;
+ }
+
+ void thread_run(DeviceTask *task)
+ {
+ if(task->type == DeviceTask::FILM_CONVERT) {
+ film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ }
+ else if(task->type == DeviceTask::SHADER) {
+ shader(*task);
+ }
+ else if(task->type == DeviceTask::PATH_TRACE) {
+ RenderTile tile;
+ bool initialize_data_and_check_render_feasibility = false;
+ bool need_to_split_tiles_further = false;
+ int2 max_render_feasible_tile_size;
+ size_t feasible_global_work_size;
+ const int2 tile_size = task->requested_tile_size;
+ /* Keep rendering tiles until done. */
+ while(task->acquire_tile(this, tile)) {
+ if(!initialize_data_and_check_render_feasibility) {
+ /* Initialize data. */
+ /* Calculate per_thread_output_buffer_size. */
+ size_t output_buffer_size = 0;
+ ciErr = clGetMemObjectInfo((cl_mem)tile.buffer,
+ CL_MEM_SIZE,
+ sizeof(output_buffer_size),
+ &output_buffer_size,
+ NULL);
+ assert(ciErr == CL_SUCCESS && "Can't get tile.buffer mem object info");
+ /* This value is different when running on AMD and NV. */
+ if(background) {
+ /* In offline render the number of buffer elements
+ * associated with tile.buffer is the current tile size.
+ */
+ per_thread_output_buffer_size =
+ output_buffer_size / (tile.w * tile.h);
+ }
+ else {
+ /* interactive rendering, unlike offline render, the number of buffer elements
+ * associated with tile.buffer is the entire viewport size.
+ */
+ per_thread_output_buffer_size =
+ output_buffer_size / (tile.buffers->params.width *
+ tile.buffers->params.height);
+ }
+ /* Check render feasibility. */
+ feasible_global_work_size = get_feasible_global_work_size(
+ tile_size,
+ CL_MEM_PTR(const_mem_map["__data"]->device_pointer));
+ max_render_feasible_tile_size =
+ get_max_render_feasible_tile_size(
+ feasible_global_work_size);
+ need_to_split_tiles_further =
+ need_to_split_tile(tile_size.x,
+ tile_size.y,
+ max_render_feasible_tile_size);
+ initialize_data_and_check_render_feasibility = true;
+ }
+ if(need_to_split_tiles_further) {
+ int2 split_tile_size =
+ get_split_tile_size(tile,
+ max_render_feasible_tile_size);
+ vector<SplitRenderTile> to_path_trace_render_tiles =
+ split_tiles(tile, split_tile_size);
+ /* Print message to console */
+ if(background && (to_path_trace_render_tiles.size() > 1)) {
+ fprintf(stderr, "Message : Tiles need to be split "
+ "further inside path trace (due to insufficient "
+ "device-global-memory for split kernel to "
+ "function) \n"
+ "The current tile of dimensions %dx%d is split "
+ "into tiles of dimension %dx%d for render \n",
+ tile.w, tile.h,
+ split_tile_size.x,
+ split_tile_size.y);
+ }
+ /* Process all split tiles. */
+ for(int tile_iter = 0;
+ tile_iter < to_path_trace_render_tiles.size();
+ ++tile_iter)
+ {
+ path_trace(task,
+ to_path_trace_render_tiles[tile_iter],
+ max_render_feasible_tile_size);
+ }
+ }
+ else {
+ /* No splitting required; process the entire tile at once. */
+ /* Render feasible tile size is user-set-tile-size itself. */
+ max_render_feasible_tile_size.x =
+ (((tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_X;
+ max_render_feasible_tile_size.y =
+ (((tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
+ SPLIT_KERNEL_LOCAL_SIZE_Y;
+ /* buffer_rng_state_stride is stride itself. */
+ SplitRenderTile split_tile(tile);
+ split_tile.buffer_rng_state_stride = tile.stride;
+ path_trace(task, split_tile, max_render_feasible_tile_size);
+ }
+ tile.sample = tile.start_sample + tile.num_samples;
+
+ /* Complete kernel execution before release tile. */
+ /* This helps in multi-device render;
+ * The device that reaches the critical-section function
+ * release_tile waits (stalling other devices from entering
+ * release_tile) for all kernels to complete. If device1 (a
+ * slow-render device) reaches release_tile first then it would
+ * stall device2 (a fast-render device) from proceeding to render
+ * next tile.
+ */
+ clFinish(cqCommandQueue);
+
+ task->release_tile(tile);
+ }
+ }
+ }
+
+protected:
+ cl_mem mem_alloc(size_t bufsize, cl_mem_flags mem_flag = CL_MEM_READ_WRITE)
+ {
+ cl_mem ptr;
+ assert(bufsize != 0);
+ ptr = clCreateBuffer(cxContext, mem_flag, bufsize, NULL, &ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer");
+ return ptr;
+ }
+
+ /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+ string build_options_for_base_program(
+ const DeviceRequestedFeatures& requested_features)
+ {
+ return requested_features.get_build_options();
+ }
+};
+
+Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, bool background)
+{
+ return new OpenCLDeviceSplitKernel(info, stats, background);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_OPENCL */
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
new file mode 100644
index 0000000..82e1640
--- /dev/null
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -0,0 +1,822 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#include "opencl.h"
+
+#include "util_logging.h"
+#include "util_path.h"
+#include "util_time.h"
+
+using std::cerr;
+using std::endl;
+
+CCL_NAMESPACE_BEGIN
+
+OpenCLCache::Slot::ProgramEntry::ProgramEntry()
+ : program(NULL),
+ mutex(NULL)
+{
+}
+
+OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry& rhs)
+ : program(rhs.program),
+ mutex(NULL)
+{
+}
+
+OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
+{
+ delete mutex;
+}
+
+OpenCLCache::Slot::Slot()
+ : context_mutex(NULL),
+ context(NULL)
+{
+}
+
+OpenCLCache::Slot::Slot(const Slot& rhs)
+ : context_mutex(NULL),
+ context(NULL),
+ programs(rhs.programs)
+{
+}
+
+OpenCLCache::Slot::~Slot()
+{
+ delete context_mutex;
+}
+
+OpenCLCache& OpenCLCache::global_instance()
+{
+ static OpenCLCache instance;
+ return instance;
+}
+
+cl_context OpenCLCache::get_context(cl_platform_id platform,
+ cl_device_id device,
+ thread_scoped_lock& slot_locker)
+{
+ assert(platform != NULL);
+
+ OpenCLCache& self = global_instance();
+
+ thread_scoped_lock cache_lock(self.cache_lock);
+
+ pair<CacheMap::iterator,bool> ins = self.cache.insert(
+ CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
+
+ Slot &slot = ins.first->second;
+
+ /* create slot lock only while holding cache lock */
+ if(!slot.context_mutex)
+ slot.context_mutex = new thread_mutex;
+
+ /* need to unlock cache before locking slot, to allow store to complete */
+ cache_lock.unlock();
+
+ /* lock the slot */
+ slot_locker = thread_scoped_lock(*slot.context_mutex);
+
+ /* If the thing isn't cached */
+ if(slot.context == NULL) {
+ /* return with the caller's lock holder holding the slot lock */
+ return NULL;
+ }
+
+ /* the item was already cached, release the slot lock */
+ slot_locker.unlock();
+
+ cl_int ciErr = clRetainContext(slot.context);
+ assert(ciErr == CL_SUCCESS);
+ (void)ciErr;
+
+ return slot.context;
+}
+
+cl_program OpenCLCache::get_program(cl_platform_id platform,
+ cl_device_id device,
+ ustring key,
+ thread_scoped_lock& slot_locker)
+{
+ assert(platform != NULL);
+
+ OpenCLCache& self = global_instance();
+
+ thread_scoped_lock cache_lock(self.cache_lock);
+
+ pair<CacheMap::iterator,bool> ins = self.cache.insert(
+ CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
+
+ Slot &slot = ins.first->second;
+
+ pair<Slot::EntryMap::iterator,bool> ins2 = slot.programs.insert(
+ Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
+
+ Slot::ProgramEntry &entry = ins2.first->second;
+
+ /* create slot lock only while holding cache lock */
+ if(!entry.mutex)
+ entry.mutex = new thread_mutex;
+
+ /* need to unlock cache before locking slot, to allow store to complete */
+ cache_lock.unlock();
+
+ /* lock the slot */
+ slot_locker = thread_scoped_lock(*entry.mutex);
+
+ /* If the thing isn't cached */
+ if(entry.program == NULL) {
+ /* return with the caller's lock holder holding the slot lock */
+ return NULL;
+ }
+
+ /* the item was already cached, release the slot lock */
+ slot_locker.unlock();
+
+ cl_int ciErr = clRetainProgram(entry.program);
+ assert(ciErr == CL_SUCCESS);
+ (void)ciErr;
+
+ return entry.program;
+}
+
+void OpenCLCache::store_context(cl_platform_id platform,
+ cl_device_id device,
+ cl_context context,
+ thread_scoped_lock& slot_locker)
+{
+ assert(platform != NULL);
+ assert(device != NULL);
+ assert(context != NULL);
+
+ OpenCLCache &self = global_instance();
+
+ thread_scoped_lock cache_lock(self.cache_lock);
+ CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
+ cache_lock.unlock();
+
+ Slot &slot = i->second;
+
+ /* sanity check */
+ assert(i != self.cache.end());
+ assert(slot.context == NULL);
+
+ slot.context = context;
+
+ /* unlock the slot */
+ slot_locker.unlock();
+
+ /* increment reference count in OpenCL.
+ * The caller is going to release the object when done with it. */
+ cl_int ciErr = clRetainContext(context);
+ assert(ciErr == CL_SUCCESS);
+ (void)ciErr;
+}
+
+void OpenCLCache::store_program(cl_platform_id platform,
+ cl_device_id device,
+ cl_program program,
+ ustring key,
+ thread_scoped_lock& slot_locker)
+{
+ assert(platform != NULL);
+ assert(device != NULL);
+ assert(program != NULL);
+
+ OpenCLCache &self = global_instance();
+
+ thread_scoped_lock cache_lock(self.cache_lock);
+
+ CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
+ assert(i != self.cache.end());
+ Slot &slot = i->second;
+
+ Slot::EntryMap::iterator i2 = slot.programs.find(key);
+ assert(i2 != slot.programs.end());
+ Slot::ProgramEntry &entry = i2->second;
+
+ assert(entry.program == NULL);
+
+ cache_lock.unlock();
+
+ entry.program = program;
+
+ /* unlock the slot */
+ slot_locker.unlock();
+
+ /* Increment reference count in OpenCL.
+ * The caller is going to release the object when done with it.
+ */
+ cl_int ciErr = clRetainProgram(program);
+ assert(ciErr == CL_SUCCESS);
+ (void)ciErr;
+}
+
+string OpenCLCache::get_kernel_md5()
+{
+ OpenCLCache &self = global_instance();
+ thread_scoped_lock lock(self.kernel_md5_lock);
+
+ if(self.kernel_md5.empty()) {
+ self.kernel_md5 = path_files_md5_hash(path_get("kernel"));
+ }
+ return self.kernel_md5;
+}
+
+OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
+ string program_name,
+ string kernel_file,
+ string kernel_build_options,
+ bool use_stdout)
+ : device(device),
+ program_name(program_name),
+ kernel_file(kernel_file),
+ kernel_build_options(kernel_build_options),
+ use_stdout(use_stdout)
+{
+ loaded = false;
+ program = NULL;
+}
+
+OpenCLDeviceBase::OpenCLProgram::~OpenCLProgram()
+{
+ release();
+}
+
+void OpenCLDeviceBase::OpenCLProgram::release()
+{
+ for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
+ if(kernel->second) {
+ clReleaseKernel(kernel->second);
+ kernel->second = NULL;
+ }
+ }
+ if(program) {
+ clReleaseProgram(program);
+ program = NULL;
+ }
+}
+
+void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug)
+{
+ if(!use_stdout) {
+ log += msg + "\n";
+ }
+ else if(!debug) {
+ printf("%s\n", msg.c_str());
+ }
+ else {
+ VLOG(2) << msg;
+ }
+}
+
+void OpenCLDeviceBase::OpenCLProgram::add_error(string msg)
+{
+ if(use_stdout) {
+ fprintf(stderr, "%s\n", msg.c_str());
+ }
+ if(error_msg == "") {
+ error_msg += "\n";
+ }
+ error_msg += msg;
+}
+
+void OpenCLDeviceBase::OpenCLProgram::add_kernel(ustring name)
+{
+ if(!kernels.count(name)) {
+ kernels[name] = NULL;
+ }
+}
+
+bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
+{
+ string build_options;
+ build_options = device->kernel_build_options(debug_src) + kernel_build_options;
+
+ cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
+
+ /* show warnings even if build is successful */
+ size_t ret_val_size = 0;
+
+ clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+
+ if(ciErr != CL_SUCCESS) {
+ add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) + ", errors in console.");
+ }
+
+ if(ret_val_size > 1) {
+ vector<char> build_log(ret_val_size + 1);
+ clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
+
+ build_log[ret_val_size] = '\0';
+ /* Skip meaningless empty output from the NVidia compiler. */
+ if(!(ret_val_size == 2 && build_log[0] == '\n')) {
+ add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]), ciErr == CL_SUCCESS);
+ }
+ }
+
+ return (ciErr == CL_SUCCESS);
+}
+
+bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
+{
+ string source = "#include \"kernels/opencl/" + kernel_file + "\" // " + OpenCLCache::get_kernel_md5() + "\n";
+ /* We compile kernels consisting of many files. unfortunately OpenCL
+ * kernel caches do not seem to recognize changes in included files.
+ * so we force recompile on changes by adding the md5 hash of all files.
+ */
+ source = path_source_replace_includes(source, path_get("kernel"));
+
+ if(debug_src) {
+ path_write_text(*debug_src, source);
+ }
+
+ size_t source_len = source.size();
+ const char *source_str = source.c_str();
+ cl_int ciErr;
+
+ program = clCreateProgramWithSource(device->cxContext,
+ 1,
+ &source_str,
+ &source_len,
+ &ciErr);
+
+ if(ciErr != CL_SUCCESS) {
+ add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
+ return false;
+ }
+
+ double starttime = time_dt();
+ add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
+ add_log(string("Build flags: ") + kernel_build_options, true);
+
+ if(!build_kernel(debug_src))
+ return false;
+
+ add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
+
+ return true;
+}
+
+bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
+ const string *debug_src)
+{
+ /* read binary into memory */
+ vector<uint8_t> binary;
+
+ if(!path_read_binary(clbin, binary)) {
+ add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
+ return false;
+ }
+
+ /* create program */
+ cl_int status, ciErr;
+ size_t size = binary.size();
+ const uint8_t *bytes = &binary[0];
+
+ program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
+ &size, &bytes, &status, &ciErr);
+
+ if(status != CL_SUCCESS || ciErr != CL_SUCCESS) {
+ add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
+ + clewErrorString(status) + " " + clewErrorString(ciErr));
+ return false;
+ }
+
+ if(!build_kernel(debug_src))
+ return false;
+
+ return true;
+}
+
+bool OpenCLDeviceBase::OpenCLProgram::save_binary(const string& clbin)
+{
+ size_t size = 0;
+ clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+
+ if(!size)
+ return false;
+
+ vector<uint8_t> binary(size);
+ uint8_t *bytes = &binary[0];
+
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+
+ return path_write_binary(clbin, binary);
+}
+
+void OpenCLDeviceBase::OpenCLProgram::load()
+{
+ assert(device);
+
+ loaded = false;
+
+ string device_md5 = device->device_md5_hash(kernel_build_options);
+
+ /* Try to use cached kernel. */
+ thread_scoped_lock cache_locker;
+ ustring cache_key(program_name + device_md5);
+ program = device->load_cached_kernel(cache_key,
+ cache_locker);
+
+ if(!program) {
+ add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
+
+ string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + OpenCLCache::get_kernel_md5();
+ basename = path_cache_get(path_join("kernels", basename));
+ string clbin = basename + ".clbin";
+
+ /* path to preprocessed source for debugging */
+ string clsrc, *debug_src = NULL;
+
+ if(OpenCLInfo::use_debug()) {
+ clsrc = basename + ".cl";
+ debug_src = &clsrc;
+ }
+
+ /* If binary kernel exists already, try use it. */
+ if(path_exists(clbin) && load_binary(clbin)) {
+ /* Kernel loaded from binary, nothing to do. */
+ add_log(string("Loaded program from ") + clbin + ".", true);
+ }
+ else {
+ add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
+
+ /* If does not exist or loading binary failed, compile kernel. */
+ if(!compile_kernel(debug_src)) {
+ return;
+ }
+
+ /* Save binary for reuse. */
+ if(!save_binary(clbin)) {
+ add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+ }
+ }
+
+ /* Cache the program. */
+ device->store_cached_kernel(program,
+ cache_key,
+ cache_locker);
+ }
+ else {
+ add_log(string("Found cached OpenCL program ") + program_name + ".", true);
+ }
+
+ for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
+ assert(kernel->second == NULL);
+ cl_int ciErr;
+ string name = "kernel_ocl_" + kernel->first.string();
+ kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
+ if(device->opencl_error(ciErr)) {
+ add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
+ return;
+ }
+ }
+
+ loaded = true;
+}
+
+void OpenCLDeviceBase::OpenCLProgram::report_error()
+{
+ /* If loaded is true, there was no error. */
+ if(loaded) return;
+ /* if use_stdout is true, the error was already reported. */
+ if(use_stdout) return;
+
+ cerr << error_msg << endl;
+ if(!compile_output.empty()) {
+ cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
+ cerr << compile_output << endl;
+ }
+}
+
+cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()()
+{
+ assert(kernels.size() == 1);
+ return kernels.begin()->second;
+}
+
+cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()(ustring name)
+{
+ assert(kernels.count(name));
+ return kernels[name];
+}
+
+cl_device_type OpenCLInfo::device_type()
+{
+ switch(DebugFlags().opencl.device_type)
+ {
+ case DebugFlags::OpenCL::DEVICE_NONE:
+ return 0;
+ case DebugFlags::OpenCL::DEVICE_ALL:
+ return CL_DEVICE_TYPE_ALL;
+ case DebugFlags::OpenCL::DEVICE_DEFAULT:
+ return CL_DEVICE_TYPE_DEFAULT;
+ case DebugFlags::OpenCL::DEVICE_CPU:
+ return CL_DEVICE_TYPE_CPU;
+ case DebugFlags::OpenCL::DEVICE_GPU:
+ return CL_DEVICE_TYPE_GPU;
+ case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
+ return CL_DEVICE_TYPE_ACCELERATOR;
+ default:
+ return CL_DEVICE_TYPE_ALL;
+ }
+}
+
+bool OpenCLInfo::use_debug()
+{
+ return DebugFlags().opencl.debug;
+}
+
+bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
+{
+ /* keep this in sync with kernel_types.h! */
+ if(platform == "NVIDIA CUDA")
+ return true;
+ else if(platform == "Apple")
+ return true;
+ else if(platform == "AMD Accelerated Parallel Processing")
+ return true;
+ else if(platform == "Intel(R) OpenCL")
+ return true;
+ /* Make sure officially unsupported OpenCL platforms
+ * does not set up to use advanced shading.
+ */
+ return false;
+}
+
+bool OpenCLInfo::kernel_use_split(const string& platform_name,
+ const cl_device_type device_type)
+{
+ if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
+ VLOG(1) << "Forcing split kernel to use.";
+ return true;
+ }
+ if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
+ VLOG(1) << "Forcing mega kernel to use.";
+ return false;
+ }
+ /* TODO(sergey): Replace string lookups with more enum-like API,
+ * similar to device/vendor checks blender's gpu.
+ */
+ if(platform_name == "AMD Accelerated Parallel Processing" &&
+ device_type == CL_DEVICE_TYPE_GPU)
+ {
+ return true;
+ }
+ return false;
+}
+
+bool OpenCLInfo::device_supported(const string& platform_name,
+ const cl_device_id device_id)
+{
+ cl_device_type device_type;
+ clGetDeviceInfo(device_id,
+ CL_DEVICE_TYPE,
+ sizeof(cl_device_type),
+ &device_type,
+ NULL);
+ if(platform_name == "AMD Accelerated Parallel Processing" &&
+ device_type == CL_DEVICE_TYPE_GPU)
+ {
+ return true;
+ }
+ if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
+ return true;
+ }
+ return false;
+}
+
+bool OpenCLInfo::platform_version_check(cl_platform_id platform,
+ string *error)
+{
+ const int req_major = 1, req_minor = 1;
+ int major, minor;
+ char version[256];
+ clGetPlatformInfo(platform,
+ CL_PLATFORM_VERSION,
+ sizeof(version),
+ &version,
+ NULL);
+ if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
+ if(error != NULL) {
+ *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
+ }
+ return false;
+ }
+ if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
+ if(error != NULL) {
+ *error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
+ }
+ return false;
+ }
+ if(error != NULL) {
+ *error = "";
+ }
+ return true;
+}
+
+bool OpenCLInfo::device_version_check(cl_device_id device,
+ string *error)
+{
+ const int req_major = 1, req_minor = 1;
+ int major, minor;
+ char version[256];
+ clGetDeviceInfo(device,
+ CL_DEVICE_OPENCL_C_VERSION,
+ sizeof(version),
+ &version,
+ NULL);
+ if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
+ if(error != NULL) {
+ *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
+ }
+ return false;
+ }
+ if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
+ if(error != NULL) {
+ *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
+ }
+ return false;
+ }
+ if(error != NULL) {
+ *error = "";
+ }
+ return true;
+}
+
+string OpenCLInfo::get_hardware_id(string platform_name, cl_device_id device_id)
+{
+ if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
+ /* Use cl_amd_device_topology extension. */
+ cl_char topology[24];
+ if(clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS && topology[0] == 1) {
+ return string_printf("%02x:%02x.%01x",
+ (unsigned int)topology[21],
+ (unsigned int)topology[22],
+ (unsigned int)topology[23]);
+ }
+ }
+ else if(platform_name == "NVIDIA CUDA") {
+ /* Use two undocumented options of the cl_nv_device_attribute_query extension. */
+ cl_int bus_id, slot_id;
+ if(clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id, NULL) == CL_SUCCESS &&
+ clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
+ return string_printf("%02x:%02x.%01x",
+ (unsigned int)(bus_id),
+ (unsigned int)(slot_id >> 3),
+ (unsigned int)(slot_id & 0x7));
+ }
+ }
+ /* No general way to get a hardware ID from OpenCL => give up. */
+ return "";
+}
+
+void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
+ bool force_all)
+{
+ const bool force_all_platforms = force_all ||
+ (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
+ const cl_device_type device_type = OpenCLInfo::device_type();
+ static bool first_time = true;
+#define FIRST_VLOG(severity) if(first_time) VLOG(severity)
+
+ usable_devices->clear();
+
+ if(device_type == 0) {
+ FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
+ first_time = false;
+ return;
+ }
+
+ vector<cl_device_id> device_ids;
+ cl_uint num_devices = 0;
+ vector<cl_platform_id> platform_ids;
+ cl_uint num_platforms = 0;
+
+ /* Get devices. */
+ if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS ||
+ num_platforms == 0)
+ {
+ FIRST_VLOG(2) << "No OpenCL platforms were found.";
+ first_time = false;
+ return;
+ }
+ platform_ids.resize(num_platforms);
+ if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) {
+ FIRST_VLOG(2) << "Failed to fetch platform IDs from the driver..";
+ first_time = false;
+ return;
+ }
+ /* Devices are numbered consecutively across platforms. */
+ for(int platform = 0; platform < num_platforms; platform++) {
+ cl_platform_id platform_id = platform_ids[platform];
+ char pname[256];
+ if(clGetPlatformInfo(platform_id,
+ CL_PLATFORM_NAME,
+ sizeof(pname),
+ &pname,
+ NULL) != CL_SUCCESS)
+ {
+ FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
+ continue;
+ }
+ string platform_name = pname;
+ FIRST_VLOG(2) << "Enumerating devices for platform "
+ << platform_name << ".";
+ if(!platform_version_check(platform_id)) {
+ FIRST_VLOG(2) << "Ignoring platform " << platform_name
+ << " due to too old compiler version.";
+ continue;
+ }
+ num_devices = 0;
+ cl_int ciErr;
+ if((ciErr = clGetDeviceIDs(platform_id,
+ device_type,
+ 0,
+ NULL,
+ &num_devices)) != CL_SUCCESS || num_devices == 0)
+ {
+ FIRST_VLOG(2) << "Ignoring platform " << platform_name
+ << ", failed to fetch number of devices: " << string(clewErrorString(ciErr));
+ continue;
+ }
+ device_ids.resize(num_devices);
+ if(clGetDeviceIDs(platform_id,
+ device_type,
+ num_devices,
+ &device_ids[0],
+ NULL) != CL_SUCCESS)
+ {
+ FIRST_VLOG(2) << "Ignoring platform " << platform_name
+ << ", failed to fetch devices list.";
+ continue;
+ }
+ for(int num = 0; num < num_devices; num++) {
+ cl_device_id device_id = device_ids[num];
+ char device_name[1024] = "\0";
+ if(clGetDeviceInfo(device_id,
+ CL_DEVICE_NAME,
+ sizeof(device_name),
+ &device_name,
+ NULL) != CL_SUCCESS)
+ {
+ FIRST_VLOG(2) << "Failed to fetch device name, ignoring.";
+ continue;
+ }
+ if(!device_version_check(device_id)) {
+ FIRST_VLOG(2) << "Ignoring device " << device_name
+ << " due to old compiler version.";
+ continue;
+ }
+ if(force_all_platforms ||
+ device_supported(platform_name, device_id))
+ {
+ cl_device_type device_type;
+ if(clGetDeviceInfo(device_id,
+ CL_DEVICE_TYPE,
+ sizeof(cl_device_type),
+ &device_type,
+ NULL) != CL_SUCCESS)
+ {
+ FIRST_VLOG(2) << "Ignoring device " << device_name
+ << ", failed to fetch device type.";
+ continue;
+ }
+ FIRST_VLOG(2) << "Adding new device " << device_name << ".";
+ string hardware_id = get_hardware_id(platform_name, device_id);
+ usable_devices->push_back(OpenCLPlatformDevice(platform_id,
+ platform_name,
+ device_id,
+ device_type,
+ device_name,
+ hardware_id));
+ }
+ else {
+ FIRST_VLOG(2) << "Ignoring device " << device_name
+ << ", not officially supported yet.";
+ }
+ }
+ }
+ first_time = false;
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index e4341c8..29e0f44 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -57,6 +57,7 @@ set(SRC_HEADERS
kernel_emission.h
kernel_film.h
kernel_globals.h
+ kernel_image_opencl.h
kernel_jitter.h
kernel_light.h
kernel_math.h
@@ -163,6 +164,8 @@ set(SRC_GEOM_HEADERS
geom/geom_curve.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
+ geom/geom_motion_triangle_intersect.h
+ geom/geom_motion_triangle_shader.h
geom/geom_object.h
geom/geom_patch.h
geom/geom_primitive.h
@@ -176,6 +179,7 @@ set(SRC_UTIL_HEADERS
../util/util_atomic.h
../util/util_color.h
../util/util_half.h
+ ../util/util_hash.h
../util/util_math.h
../util/util_math_fast.h
../util/util_static_assert.h
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 71c925a..3679898 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -72,13 +72,13 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_NAME bvh_intersect_subsurface
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "bvh_subsurface.h"
-#endif
-#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
-# include "bvh_subsurface.h"
-#endif
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
+# include "bvh_subsurface.h"
+# endif
+#endif /* __SUBSURFACE__ */
/* Volume BVH traversal */
@@ -86,19 +86,19 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_NAME bvh_intersect_volume
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "bvh_volume.h"
-#endif
-
-#if defined(__VOLUME__) && defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "bvh_volume.h"
-#endif
-#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
-# include "bvh_volume.h"
-#endif
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# include "bvh_volume.h"
+# endif
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+# include "bvh_volume.h"
+# endif
+#endif /* __VOLUME__ */
/* Record all intersections - Shadow BVH traversal */
@@ -106,31 +106,31 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
# define BVH_FUNCTION_FEATURES 0
# include "bvh_shadow_all.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "bvh_shadow_all.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "bvh_shadow_all.h"
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-# include "bvh_shadow_all.h"
-#endif
-#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
-# include "bvh_shadow_all.h"
-#endif
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING
+# include "bvh_shadow_all.h"
+# endif
+
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# include "bvh_shadow_all.h"
+# endif
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+# include "bvh_shadow_all.h"
+# endif
+
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
+# include "bvh_shadow_all.h"
+# endif
+#endif /* __SHADOW_RECORD_ALL__ */
/* Record all intersections - Volume BVH traversal */
@@ -138,19 +138,19 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "bvh_volume_all.h"
-#endif
-
-#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-# include "bvh_volume_all.h"
-#endif
-#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
-# include "bvh_volume_all.h"
-#endif
+# if defined(__INSTANCING__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+# include "bvh_volume_all.h"
+# endif
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
+# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
+# include "bvh_volume_all.h"
+# endif
+#endif /* __VOLUME_RECORD_ALL__ */
#undef BVH_FEATURE
#undef BVH_NAME_JOIN
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index e9eeff3..df33a86 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -108,7 +108,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
/* traverse internal nodes */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- int node_addr_ahild1, traverse_mask;
+ int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
@@ -141,25 +141,25 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
- node_addr_ahild1 = __float_as_int(cnodes.w);
+ node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
int tmp = node_addr;
- node_addr = node_addr_ahild1;
- node_addr_ahild1 = tmp;
+ node_addr = node_addr_child1;
+ node_addr_child1 = tmp;
}
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
- traversal_stack[stack_ptr] = node_addr_ahild1;
+ traversal_stack[stack_ptr] = node_addr_child1;
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
- node_addr = node_addr_ahild1;
+ node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
/* Neither child was intersected. */
@@ -187,7 +187,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* primitive intersection */
while(prim_addr < prim_addr2) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
bool hit;
@@ -222,6 +222,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect_array,
@@ -231,7 +232,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
NULL,
0, 0);
}
@@ -244,7 +245,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
NULL,
0, 0);
}
@@ -278,7 +279,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
shader = __float_as_int(str.z);
}
#endif
- int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+ int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
/* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
@@ -343,6 +344,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
+ /* Instance pop. */
if(num_hits_in_instance) {
float t_fac;
@@ -355,8 +357,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
/* scale isect->t to adjust for instancing */
- for(int i = 0; i < num_hits_in_instance; i++)
+ for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
+ }
}
else {
float ignore_t = FLT_MAX;
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index a0e478e..80c8f31 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -213,7 +213,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
}
}
- BVH_DEBUG_NEXT_STEP();
+ BVH_DEBUG_NEXT_NODE();
}
/* if node is leaf, fetch triangle list */
@@ -235,7 +235,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_STEP();
+ BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
&isect_precalc,
@@ -264,7 +264,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_STEP();
+ BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
@@ -296,8 +296,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ BVH_DEBUG_NEXT_INTERSECTION();
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
@@ -308,7 +309,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
lcg_state,
difl,
extmax);
@@ -322,7 +323,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
lcg_state,
difl,
extmax);
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index c3abe2e..ead424a 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -50,12 +50,17 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_DEBUG__
# define BVH_DEBUG_INIT() \
do { \
- isect->num_traversal_steps = 0; \
+ isect->num_traversed_nodes = 0; \
isect->num_traversed_instances = 0; \
+ isect->num_intersections = 0; \
} while(0)
-# define BVH_DEBUG_NEXT_STEP() \
+# define BVH_DEBUG_NEXT_NODE() \
do { \
- ++isect->num_traversal_steps; \
+ ++isect->num_traversed_nodes; \
+ } while(0)
+# define BVH_DEBUG_NEXT_INTERSECTION() \
+ do { \
+ ++isect->num_intersections; \
} while(0)
# define BVH_DEBUG_NEXT_INSTANCE() \
do { \
@@ -63,7 +68,8 @@ CCL_NAMESPACE_BEGIN
} while(0)
#else /* __KERNEL_DEBUG__ */
# define BVH_DEBUG_INIT()
-# define BVH_DEBUG_NEXT_STEP()
+# define BVH_DEBUG_NEXT_NODE()
+# define BVH_DEBUG_NEXT_INTERSECTION()
# define BVH_DEBUG_NEXT_INSTANCE()
#endif /* __KERNEL_DEBUG__ */
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 1f6515c..529848e 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -99,7 +99,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
+#endif /* __KERNEL_SSE2__ */
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -334,6 +334,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
+ /* Instance pop. */
if(num_hits_in_instance) {
float t_fac;
# if BVH_FEATURE(BVH_MOTION)
@@ -377,7 +378,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_MOTION) */
+#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
index 2ee2a39..6d22f0b 100644
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ b/intern/cycles/kernel/bvh/qbvh_nodes.h
@@ -21,6 +21,36 @@ struct QBVHStackItem {
float dist;
};
+ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
+ int *ccl_restrict near_x,
+ int *ccl_restrict near_y,
+ int *ccl_restrict near_z,
+ int *ccl_restrict far_x,
+ int *ccl_restrict far_y,
+ int *ccl_restrict far_z)
+
+{
+#ifdef __KERNEL_SSE__
+ *near_x = 0; *far_x = 1;
+ *near_y = 2; *far_y = 3;
+ *near_z = 4; *far_z = 5;
+
+ const size_t mask = movemask(ssef(idir.m128));
+
+ const int mask_x = mask & 1;
+ const int mask_y = (mask & 2) >> 1;
+ const int mask_z = (mask & 4) >> 2;
+
+ *near_x += mask_x; *far_x -= mask_x;
+ *near_y += mask_y; *far_y -= mask_y;
+ *near_z += mask_z; *far_z -= mask_z;
+#else
+ if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
+ if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
+ if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
+#endif
+}
+
/* TOOD(sergey): Investigate if using intrinsics helps for both
* stack item swap and float comparison.
*/
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index 2e6b6b8..607295f 100644
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -37,6 +37,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
uint *num_hits)
{
/* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
* - Likely and unlikely for if() statements.
* - Test restrict attribute for pointers.
*/
@@ -74,7 +75,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int num_hits_in_instance = 0;
#endif
- ssef tnear(0.0f), tfar(tmax);
+ ssef tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
#endif
@@ -91,10 +92,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -106,14 +106,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+ if(false
#ifdef __VISIBILITY_FLAG__
- if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
+ || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
+#endif
+#if BVH_FEATURE(BVH_MOTION)
+ || UNLIKELY(ray->time < inodes.y)
+ || UNLIKELY(ray->time > inodes.z)
+#endif
+ ) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
-#endif
ssef dist;
int child_mask = NODE_INTERSECT(kg,
@@ -122,12 +128,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#ifdef __KERNEL_AVX2__
P_idir4,
#endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
-# endif
-# if BVH_FEATURE(BVH_HAIR)
+#endif
+#if BVH_FEATURE(BVH_HAIR)
dir4,
-# endif
+#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
@@ -262,7 +268,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Primitive intersection. */
while(prim_addr < prim_addr2) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
bool hit;
@@ -297,6 +303,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect_array,
@@ -306,7 +313,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
NULL,
0, 0);
}
@@ -319,7 +326,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
NULL,
0, 0);
}
@@ -353,7 +360,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
shader = __float_as_int(str.z);
}
#endif
- int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+ int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
/* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
@@ -391,9 +398,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
num_hits_in_instance = 0;
isect_array->t = isect_t;
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -424,22 +431,21 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
+ /* Instance pop. */
if(num_hits_in_instance) {
float t_fac;
-
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
-
- /* scale isect->t to adjust for instancing */
- for(int i = 0; i < num_hits_in_instance; i++)
+ /* Scale isect->t to adjust for instancing. */
+ for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
+ }
}
else {
float ignore_t = FLT_MAX;
-
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
# else
@@ -450,10 +456,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
- tfar = ssef(tmax);
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
+ tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
# endif
diff --git a/intern/cycles/kernel/bvh/qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h
index 24aca96..ccd36df 100644
--- a/intern/cycles/kernel/bvh/qbvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h
@@ -101,10 +101,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index a1e154d..10ae7be 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -102,10 +102,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -118,6 +117,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(UNLIKELY(node_dist > isect->t)
+#if BVH_FEATURE(BVH_MOTION)
+ || UNLIKELY(ray->time < inodes.y)
+ || UNLIKELY(ray->time > inodes.z)
+#endif
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0)
#endif
@@ -132,7 +135,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int child_mask;
ssef dist;
- BVH_DEBUG_NEXT_STEP();
+ BVH_DEBUG_NEXT_NODE();
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
@@ -327,7 +330,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_STEP();
+ BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
&isect_precalc,
@@ -348,7 +351,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_STEP();
+ BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
@@ -372,8 +375,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_STEP();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+ BVH_DEBUG_NEXT_INTERSECTION();
+ const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+ kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
@@ -384,7 +388,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
lcg_state,
difl,
extmax);
@@ -398,7 +402,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object,
prim_addr,
ray->time,
- type,
+ curve_type,
lcg_state,
difl,
extmax);
@@ -427,9 +431,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -469,9 +473,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index a97bf3c..1e77d8e 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -87,10 +87,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -100,8 +99,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-#ifdef __VISIBILITY_FLAG__
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+
+#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(inodes.x) & visibility) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
@@ -295,16 +295,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
-
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -348,9 +347,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index e1ca27a..eb48af6 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -66,7 +66,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#ifndef __KERNEL_SSE41__
if(!isfinite(P.x)) {
- return false;
+ return 0;
}
#endif
@@ -91,10 +91,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
-
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -104,8 +103,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-#ifdef __VISIBILITY_FLAG__
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
+
+#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(inodes.x) & visibility) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
@@ -353,9 +353,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
# endif
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# if BVH_FEATURE(BVH_HAIR)
@@ -402,7 +402,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
@@ -415,12 +414,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
}
- if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
- if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
- if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ isect_t = tmax;
+ isect_array->t = isect_t;
+
+ qbvh_near_far_idx_calc(idir,
+ &near_x, &near_y, &near_z,
+ &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -435,8 +436,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
# endif
triangle_intersect_precalc(dir, &isect_precalc);
- isect_t = tmax;
- isect_array->t = isect_t;
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr].addr;
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 1cd8246..b6c896c 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -143,6 +143,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
{
const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
float3 N = bsdf->N;
+ int label = LABEL_REFLECT | LABEL_GLOSSY;
float NdotI = dot(N, I);
if(NdotI > 0.0f) {
@@ -211,6 +212,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
/* Some high number for MIS. */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
/* leave the rest to eval_reflect */
@@ -224,7 +226,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
#endif
}
- return LABEL_REFLECT|LABEL_GLOSSY;
+ return label;
}
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index bede5f4..daaa26d 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -267,7 +267,10 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
*eval = make_float3(*pdf, *pdf, *pdf);
- kernel_assert(dot(locy, *omega_in) < 0.0f);
+ /* TODO(sergey): Should always be negative, but seems some precision issue
+ * is involved here.
+ */
+ kernel_assert(dot(locy, *omega_in) < 1e-4f);
return LABEL_TRANSMIT|LABEL_GLOSSY;
}
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 08e580f..4a1316f 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -267,7 +267,7 @@ ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosur
(isequal_float3(bsdf_a->T, bsdf_b->T)) &&
(bsdf_a->ior == bsdf_b->ior) &&
((!bsdf_a->extra && !bsdf_b->extra) ||
- ((bsdf_a->extra && bsdf_b->extra) &&
+ ((bsdf_a->extra && bsdf_b->extra) &&
(isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color))));
}
@@ -452,6 +452,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
float alpha_y = bsdf->alpha_y;
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
float3 N = bsdf->N;
+ int label;
float cosNO = dot(N, I);
if(cosNO > 0) {
@@ -477,6 +478,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* reflection or refraction? */
if(!m_refractive) {
float cosMO = dot(m, I);
+ label = LABEL_REFLECT | LABEL_GLOSSY;
if(cosMO > 0) {
/* eq. 39 - compute actual reflected direction */
@@ -487,6 +489,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
/* microfacet normal is visible to this ray */
@@ -549,6 +552,8 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
}
}
else {
+ label = LABEL_TRANSMIT | LABEL_GLOSSY;
+
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
@@ -576,6 +581,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_TRANSMIT | LABEL_SINGULAR;
}
else {
/* eq. 33 */
@@ -607,7 +613,10 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
}
}
}
- return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
+ else {
+ label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
+ }
+ return label;
}
/* Beckmann microfacet with Smith shadow-masking from:
@@ -815,6 +824,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
float alpha_y = bsdf->alpha_y;
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
float3 N = bsdf->N;
+ int label;
float cosNO = dot(N, I);
if(cosNO > 0) {
@@ -839,6 +849,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
/* reflection or refraction? */
if(!m_refractive) {
+ label = LABEL_REFLECT | LABEL_GLOSSY;
float cosMO = dot(m, I);
if(cosMO > 0) {
@@ -850,6 +861,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
/* microfacet normal is visible to this ray
@@ -904,6 +916,8 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
}
}
else {
+ label = LABEL_TRANSMIT | LABEL_GLOSSY;
+
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
@@ -931,6 +945,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
/* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
+ label = LABEL_TRANSMIT | LABEL_SINGULAR;
}
else {
/* eq. 33 */
@@ -963,7 +978,10 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
}
}
}
- return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
+ else {
+ label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
+ }
+ return label;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 24ced93..6838e26 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -23,6 +23,8 @@
#include "geom_subd_triangle.h"
#include "geom_triangle_intersect.h"
#include "geom_motion_triangle.h"
+#include "geom_motion_triangle_intersect.h"
+#include "geom_motion_triangle_shader.h"
#include "geom_motion_curve.h"
#include "geom_curve.h"
#include "geom_volume.h"
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 84aaaab..9de3354 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -255,6 +255,17 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int ka = max(k0 - 1, v00.x);
int kb = min(k1 + 1, v00.x + v00.y - 1);
+#ifdef __KERNEL_AVX2__
+ avxf P_curve_0_1, P_curve_2_3;
+ if(type & PRIMITIVE_CURVE) {
+ P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
+ P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
+ }
+#else /* __KERNEL_AVX2__ */
ssef P_curve[4];
if(type & PRIMITIVE_CURVE) {
@@ -267,6 +278,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
}
+#endif /* __KERNEL_AVX2__ */
ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
@@ -278,6 +290,33 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+#ifdef __KERNEL_AVX2__
+ const avxf vPP = _mm256_broadcast_ps(&P.m128);
+ const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
+ const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
+ const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
+
+ const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_0_1 - vPP),
+ htfm11,
+ shuffle<2>(P_curve_0_1 - vPP) * htfm22));
+ const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_2_3 - vPP),
+ htfm11,
+ shuffle<2>(P_curve_2_3 - vPP)*htfm22));
+
+ const ssef p0 = _mm256_castps256_ps128(p01);
+ const ssef p1 = _mm256_extractf128_ps(p01, 1);
+ const ssef p2 = _mm256_castps256_ps128(p23);
+ const ssef p3 = _mm256_extractf128_ps(p23, 1);
+
+ const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
+ r_st = ((float4 &)P_curve_1).w;
+ const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
+ r_en = ((float4 &)P_curve_2).w;
+#else /* __KERNEL_AVX2__ */
ssef htfm[] = { htfm0, htfm1, htfm2 };
ssef vP = load4f(P);
ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
@@ -285,6 +324,10 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
+ r_st = ((float4 &)P_curve[1]).w;
+ r_en = ((float4 &)P_curve[2]).w;
+#endif /* __KERNEL_AVX2__ */
+
float fc = 0.71f;
ssef vfc = ssef(fc);
ssef vfcxp3 = vfc * p3;
@@ -294,8 +337,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
- r_st = ((float4 &)P_curve[1]).w;
- r_en = ((float4 &)P_curve[2]).w;
}
#else
float3 curve_coef[4];
@@ -383,8 +424,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
/* begin loop */
while(!(tree >> (depth))) {
- float i_st = tree * resol;
- float i_en = i_st + (level * resol);
+ const float i_st = tree * resol;
+ const float i_en = i_st + (level * resol);
+
#ifdef __KERNEL_SSE2__
ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
@@ -458,13 +500,23 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(flags & CURVE_KN_RIBBONS) {
float3 tg = (p_en - p_st);
+#ifdef __KERNEL_SSE__
+ const float3 tg_sq = tg * tg;
+ float w = tg_sq.x + tg_sq.y;
+#else
float w = tg.x * tg.x + tg.y * tg.y;
+#endif
if(w == 0) {
tree++;
level = tree & -tree;
continue;
}
+#ifdef __KERNEL_SSE__
+ const float3 p_sttg = p_st * tg;
+ w = -(p_sttg.x + p_sttg.y) / w;
+#else
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+#endif
w = saturate(w);
/* compute u on the curve segment */
@@ -496,7 +548,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(difl != 0.0f) {
mw_extension = min(difl * fabsf(bmaxz), extmax);
r_ext = mw_extension + r_curr;
+#ifdef __KERNEL_SSE__
+ const float3 p_curr_sq = p_curr * p_curr;
+ const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
+ float d = dxxx.x;
+#else
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+#endif
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
@@ -853,7 +911,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection
# undef len3_squared
# undef len3
# undef dot3
-# endif
+#endif
}
ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 6de5aa7..dc1388b 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -50,12 +50,12 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object,
ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
{
if(step == numsteps) {
- /* center step: regular vertex location */
+ /* center step: regular key location */
keys[0] = kernel_tex_fetch(__curve_keys, k0);
keys[1] = kernel_tex_fetch(__curve_keys, k1);
}
else {
- /* center step not stored in this array */
+ /* center step is not stored in this array */
if(step > numsteps)
step--;
@@ -97,14 +97,14 @@ ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim
ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
{
if(step == numsteps) {
- /* center step: regular vertex location */
+ /* center step: regular key location */
keys[0] = kernel_tex_fetch(__curve_keys, k0);
keys[1] = kernel_tex_fetch(__curve_keys, k1);
keys[2] = kernel_tex_fetch(__curve_keys, k2);
keys[3] = kernel_tex_fetch(__curve_keys, k3);
}
else {
- /* center step not store in this array */
+ /* center step is not stored in this array */
if(step > numsteps)
step--;
@@ -118,7 +118,12 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
}
/* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0, int k1, int k2, int k3,
+ float4 keys[4])
{
/* get motion info */
int numsteps, numkeys;
@@ -147,6 +152,65 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object,
keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
}
+#ifdef __KERNEL_AVX2__
+/* Similar to above, but returns keys as pair of two AVX registers with each
+ * holding two float4.
+ */
+ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0, int k1,
+ int k2, int k3,
+ avxf *out_keys_0_1,
+ avxf *out_keys_2_3)
+{
+ /* Get motion info. */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time*maxstep), maxstep - 1);
+ float t = time*maxstep - step;
+
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg,
+ object,
+ ATTR_STD_MOTION_VERTEX_POSITION,
+ &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* Fetch key coordinates. */
+ float4 next_keys[4];
+ float4 keys[4];
+ motion_cardinal_curve_keys_for_step(kg,
+ offset,
+ numkeys,
+ numsteps,
+ step,
+ k0, k1, k2, k3,
+ keys);
+ motion_cardinal_curve_keys_for_step(kg,
+ offset,
+ numkeys,
+ numsteps,
+ step + 1,
+ k0, k1, k2, k3,
+ next_keys);
+
+ const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
+ const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
+ const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
+ const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
+
+ /* Interpolate between steps. */
+ *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
+ *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
+}
+#endif
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 3cbe59a..4e84aa9 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -76,7 +76,7 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4
normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
}
else {
- /* center step not stored in this array */
+ /* center step is not stored in this array */
if(step > numsteps)
step--;
@@ -117,312 +117,4 @@ ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, i
verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
}
-/* Refine triangle intersection to more precise hit point. For rays that travel
- * far the precision is often not so good, this reintersects the primitive from
- * a closer distance. */
-
-ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
-{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
-
-#ifdef __INTERSECTION_REFINE__
- if(isect->object != OBJECT_NONE) {
- if(UNLIKELY(t == 0.0f)) {
- return P;
- }
-# ifdef __OBJECT_MOTION__
- Transform tfm = ccl_fetch(sd, ob_itfm);
-# else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-# endif
-
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D*t);
- D = normalize_len(D, &t);
- }
-
- P = P + D*t;
-
- /* compute refined intersection distance */
- const float3 e1 = verts[0] - verts[2];
- const float3 e2 = verts[1] - verts[2];
- const float3 s1 = cross(D, e2);
-
- const float invdivisor = 1.0f/dot(s1, e1);
- const float3 d = P - verts[2];
- const float3 s2 = cross(d, e1);
- float rt = dot(e2, s2)*invdivisor;
-
- /* compute refined position */
- P = P + D*rt;
-
- if(isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
- Transform tfm = ccl_fetch(sd, ob_tfm);
-# else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-# endif
-
- P = transform_point(&tfm, P);
- }
-
- return P;
-#else
- return P + D*t;
-#endif
-}
-
-/* Same as above, except that isect->t is assumed to be in object space for instancing */
-
-#ifdef __SUBSURFACE__
-# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
-ccl_device_noinline
-# else
-ccl_device_inline
-# endif
-float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
-{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
-
-# ifdef __INTERSECTION_REFINE__
- if(isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
- Transform tfm = ccl_fetch(sd, ob_itfm);
-# else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-# endif
-
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D);
- D = normalize(D);
- }
-
- P = P + D*t;
-
- /* compute refined intersection distance */
- const float3 e1 = verts[0] - verts[2];
- const float3 e2 = verts[1] - verts[2];
- const float3 s1 = cross(D, e2);
-
- const float invdivisor = 1.0f/dot(s1, e1);
- const float3 d = P - verts[2];
- const float3 s2 = cross(d, e1);
- float rt = dot(e2, s2)*invdivisor;
-
- P = P + D*rt;
-
- if(isect->object != OBJECT_NONE) {
-# ifdef __OBJECT_MOTION__
- Transform tfm = ccl_fetch(sd, ob_tfm);
-# else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-# endif
-
- P = transform_point(&tfm, P);
- }
-
- return P;
-# else
- return P + D*t;
-# endif
-}
-#endif
-
-/* Setup of motion triangle specific parts of ShaderData, moved into this one
- * function to more easily share computation of interpolated positions and
- * normals */
-
-/* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
-{
- /* get shader */
- ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
-
- /* get motion info */
- int numsteps, numverts;
- object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
-
- /* figure out which steps we need to fetch and their interpolation factor */
- int maxstep = numsteps*2;
- int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
- float t = ccl_fetch(sd, time)*maxstep - step;
-
- /* find attribute */
- AttributeElement elem;
- int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* fetch vertex coordinates */
- float3 verts[3], next_verts[3];
- uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
-
- motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
- motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
-
- /* interpolate between steps */
- verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
- verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
- verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
-
- /* compute refined position */
-#ifdef __SUBSURFACE__
- if(!subsurface)
-#endif
- ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
-#ifdef __SUBSURFACE__
- else
- ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
-#endif
-
- /* compute face normal */
- float3 Ng;
- if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
- Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
- else
- Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
-
- ccl_fetch(sd, Ng) = Ng;
- ccl_fetch(sd, N) = Ng;
-
- /* compute derivatives of P w.r.t. uv */
-#ifdef __DPDU__
- ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
- ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
-#endif
-
- /* compute smooth normal */
- if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
- /* find attribute */
- AttributeElement elem;
- int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* fetch vertex coordinates */
- float3 normals[3], next_normals[3];
- motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
- motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
-
- /* interpolate between steps */
- normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
- normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
- normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
-
- /* interpolate between vertices */
- float u = ccl_fetch(sd, u);
- float v = ccl_fetch(sd, v);
- float w = 1.0f - u - v;
- ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
- }
-}
-
-/* Ray intersection. We simply compute the vertex positions at the given ray
- * time and do a ray intersection with the resulting triangle */
-
-ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
-{
- /* primitive index for vertex location lookup */
- int prim = kernel_tex_fetch(__prim_index, triAddr);
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
-
- /* get vertex locations for intersection */
- float3 verts[3];
- motion_triangle_vertices(kg, fobject, prim, time, verts);
-
- /* ray-triangle intersection, unoptimized */
- float t, u, v;
-
- if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) {
-#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
-#endif
- {
- isect->t = t;
- isect->u = u;
- isect->v = v;
- isect->prim = triAddr;
- isect->object = object;
- isect->type = PRIMITIVE_MOTION_TRIANGLE;
-
- return true;
- }
- }
-
- return false;
-}
-
-/* Special ray intersection routines for subsurface scattering. In that case we
- * only want to intersect with primitives in the same object, and if case of
- * multiple hits we pick a single random primitive as the intersection point. */
-
-#ifdef __SUBSURFACE__
-ccl_device_inline void motion_triangle_intersect_subsurface(
- KernelGlobals *kg,
- SubsurfaceIntersection *ss_isect,
- float3 P,
- float3 dir,
- float time,
- int object,
- int triAddr,
- float tmax,
- uint *lcg_state,
- int max_hits)
-{
- /* primitive index for vertex location lookup */
- int prim = kernel_tex_fetch(__prim_index, triAddr);
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
-
- /* get vertex locations for intersection */
- float3 verts[3];
- motion_triangle_vertices(kg, fobject, prim, time, verts);
-
- /* ray-triangle intersection, unoptimized */
- float t, u, v;
-
- if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
- for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
- if(ss_isect->hits[i].t == t) {
- return;
- }
- }
-
- ss_isect->num_hits++;
-
- int hit;
-
- if(ss_isect->num_hits <= max_hits) {
- hit = ss_isect->num_hits - 1;
- }
- else {
- /* reservoir sampling: if we are at the maximum number of
- * hits, randomly replace element or skip it */
- hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
-
- if(hit >= max_hits)
- return;
- }
-
- /* record intersection */
- Intersection *isect = &ss_isect->hits[hit];
- isect->t = t;
- isect->u = u;
- isect->v = v;
- isect->prim = triAddr;
- isect->object = object;
- isect->type = PRIMITIVE_MOTION_TRIANGLE;
-
- /* Record geometric normal. */
- ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
- verts[2] - verts[0]));
- }
-}
-#endif
-
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
new file mode 100644
index 0000000..d57d74e
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Motion Triangle Primitive
+ *
+ * These are stored as regular triangles, plus extra positions and normals at
+ * times other than the frame center. Computing the triangle vertex positions
+ * or normals at a given ray time is a matter of interpolation of the two steps
+ * between which the ray time lies.
+ *
+ * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
+ * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Refine triangle intersection to more precise hit point. For rays that travel
+ * far the precision is often not so good, this reintersects the primitive from
+ * a closer distance.
+ */
+
+ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray,
+ float3 verts[3])
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+ if(isect->object != OBJECT_NONE) {
+ if(UNLIKELY(t == 0.0f)) {
+ return P;
+ }
+# ifdef __OBJECT_MOTION__
+ Transform tfm = ccl_fetch(sd, ob_itfm);
+# else
+ Transform tfm = object_fetch_transform(kg,
+ isect->object,
+ OBJECT_INVERSE_TRANSFORM);
+# endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D*t);
+ D = normalize_len(D, &t);
+ }
+
+ P = P + D*t;
+
+ /* Compute refined intersection distance. */
+ const float3 e1 = verts[0] - verts[2];
+ const float3 e2 = verts[1] - verts[2];
+ const float3 s1 = cross(D, e2);
+
+ const float invdivisor = 1.0f/dot(s1, e1);
+ const float3 d = P - verts[2];
+ const float3 s2 = cross(d, e1);
+ float rt = dot(e2, s2)*invdivisor;
+
+ /* Compute refined position. */
+ P = P + D*rt;
+
+ if(isect->object != OBJECT_NONE) {
+# ifdef __OBJECT_MOTION__
+ Transform tfm = ccl_fetch(sd, ob_tfm);
+# else
+ Transform tfm = object_fetch_transform(kg,
+ isect->object,
+ OBJECT_TRANSFORM);
+# endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+#else
+ return P + D*t;
+#endif
+}
+
+/* Same as above, except that isect->t is assumed to be in object space
+ * for instancing.
+ */
+
+#ifdef __SUBSURFACE__
+# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
+ccl_device_noinline
+# else
+ccl_device_inline
+# endif
+float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray,
+ float3 verts[3])
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+# ifdef __INTERSECTION_REFINE__
+ if(isect->object != OBJECT_NONE) {
+# ifdef __OBJECT_MOTION__
+ Transform tfm = ccl_fetch(sd, ob_itfm);
+# else
+ Transform tfm = object_fetch_transform(kg,
+ isect->object,
+ OBJECT_INVERSE_TRANSFORM);
+# endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D);
+ D = normalize(D);
+ }
+
+ P = P + D*t;
+
+ /* compute refined intersection distance */
+ const float3 e1 = verts[0] - verts[2];
+ const float3 e2 = verts[1] - verts[2];
+ const float3 s1 = cross(D, e2);
+
+ const float invdivisor = 1.0f/dot(s1, e1);
+ const float3 d = P - verts[2];
+ const float3 s2 = cross(d, e1);
+ float rt = dot(e2, s2)*invdivisor;
+
+ P = P + D*rt;
+
+ if(isect->object != OBJECT_NONE) {
+# ifdef __OBJECT_MOTION__
+ Transform tfm = ccl_fetch(sd, ob_tfm);
+# else
+ Transform tfm = object_fetch_transform(kg,
+ isect->object,
+ OBJECT_TRANSFORM);
+# endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+# else /* __INTERSECTION_REFINE__ */
+ return P + D*t;
+# endif /* __INTERSECTION_REFINE__ */
+}
+#endif /* __SUBSURFACE__ */
+
+
+/* Ray intersection. We simply compute the vertex positions at the given ray
+ * time and do a ray intersection with the resulting triangle.
+ */
+
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
+ Intersection *isect,
+ float3 P,
+ float3 dir,
+ float time,
+ uint visibility,
+ int object,
+ int prim_addr)
+{
+ /* Primitive index for vertex location lookup. */
+ int prim = kernel_tex_fetch(__prim_index, prim_addr);
+ int fobject = (object == OBJECT_NONE)
+ ? kernel_tex_fetch(__prim_object, prim_addr)
+ : object;
+ /* Get vertex locations for intersection. */
+ float3 verts[3];
+ motion_triangle_vertices(kg, fobject, prim, time, verts);
+ /* Ray-triangle intersection, unoptimized. */
+ float t, u, v;
+ if(ray_triangle_intersect_uv(P,
+ dir,
+ isect->t,
+ verts[2], verts[0], verts[1],
+ &u, &v, &t))
+ {
+#ifdef __VISIBILITY_FLAG__
+ /* Visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags.
+ */
+ if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+#endif
+ {
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Special ray intersection routines for subsurface scattering. In that case we
+ * only want to intersect with primitives in the same object, and if case of
+ * multiple hits we pick a single random primitive as the intersection point.
+ */
+#ifdef __SUBSURFACE__
+ccl_device_inline void motion_triangle_intersect_subsurface(
+ KernelGlobals *kg,
+ SubsurfaceIntersection *ss_isect,
+ float3 P,
+ float3 dir,
+ float time,
+ int object,
+ int prim_addr,
+ float tmax,
+ uint *lcg_state,
+ int max_hits)
+{
+ /* Primitive index for vertex location lookup. */
+ int prim = kernel_tex_fetch(__prim_index, prim_addr);
+ int fobject = (object == OBJECT_NONE)
+ ? kernel_tex_fetch(__prim_object, prim_addr)
+ : object;
+ /* Get vertex locations for intersection. */
+ float3 verts[3];
+ motion_triangle_vertices(kg, fobject, prim, time, verts);
+ /* Ray-triangle intersection, unoptimized. */
+ float t, u, v;
+ if(ray_triangle_intersect_uv(P,
+ dir,
+ tmax,
+ verts[2], verts[0], verts[1],
+ &u, &v, &t))
+ {
+ for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
+ if(ss_isect->hits[i].t == t) {
+ return;
+ }
+ }
+ ss_isect->num_hits++;
+ int hit;
+ if(ss_isect->num_hits <= max_hits) {
+ hit = ss_isect->num_hits - 1;
+ }
+ else {
+ /* Reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it.
+ */
+ hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
+
+ if(hit >= max_hits)
+ return;
+ }
+ /* Record intersection. */
+ Intersection *isect = &ss_isect->hits[hit];
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
+ /* Record geometric normal. */
+ ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
+ verts[2] - verts[0]));
+ }
+}
+#endif /* __SUBSURFACE__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
new file mode 100644
index 0000000..c5dbc6a
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Motion Triangle Primitive
+ *
+ * These are stored as regular triangles, plus extra positions and normals at
+ * times other than the frame center. Computing the triangle vertex positions
+ * or normals at a given ray time is a matter of interpolation of the two steps
+ * between which the ray time lies.
+ *
+ * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
+ * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Setup of motion triangle specific parts of ShaderData, moved into this one
+ * function to more easily share computation of interpolated positions and
+ * normals */
+
+/* return 3 triangle vertex normals */
+ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg,
+ ShaderData *sd, const
+ Intersection *isect,
+ const Ray *ray,
+ bool subsurface)
+{
+ /* Get shader. */
+ ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
+ /* Get motion info. */
+ /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
+ * can we de-duplicate something here?
+ */
+ int numsteps, numverts;
+ object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps*2;
+ int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
+ float t = ccl_fetch(sd, time)*maxstep - step;
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, ccl_fetch(sd, object),
+ ATTR_STD_MOTION_VERTEX_POSITION,
+ &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+ /* Fetch vertex coordinates. */
+ float3 verts[3], next_verts[3];
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
+ /* Interpolate between steps. */
+ verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
+ verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
+ verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
+ /* Compute refined position. */
+#ifdef __SUBSURFACE__
+ if(subsurface) {
+ ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg,
+ sd,
+ isect,
+ ray,
+ verts);
+ }
+ else
+#endif /* __SUBSURFACE__*/
+ {
+ ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
+ }
+ /* Compute face normal. */
+ float3 Ng;
+ if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED) {
+ Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+ }
+ else {
+ Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+ }
+ ccl_fetch(sd, Ng) = Ng;
+ ccl_fetch(sd, N) = Ng;
+ /* Compute derivatives of P w.r.t. uv. */
+#ifdef __DPDU__
+ ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
+ ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
+#endif
+ /* Compute smooth normal. */
+ if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg,
+ ccl_fetch(sd, object),
+ ATTR_STD_MOTION_VERTEX_NORMAL,
+ &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+ /* Fetch vertex coordinates. */
+ float3 normals[3], next_normals[3];
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
+ /* Interpolate between steps. */
+ normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
+ normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
+ normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
+ /* Interpolate between vertices. */
+ float u = ccl_fetch(sd, u);
+ float v = ccl_fetch(sd, v);
+ float w = 1.0f - u - v;
+ ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
+ }
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 4f72c5b..9f0fe03 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -55,6 +55,21 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object
return tfm;
}
+/* Lamp to world space transformation */
+
+ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse)
+{
+ int offset = lamp*LIGHT_SIZE + (inverse? 8 : 5);
+
+ Transform tfm;
+ tfm.x = kernel_tex_fetch(__light_data, offset + 0);
+ tfm.y = kernel_tex_fetch(__light_data, offset + 1);
+ tfm.z = kernel_tex_fetch(__light_data, offset + 2);
+ tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+
+ return tfm;
+}
+
/* Object to world space transformation for motion vectors */
ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
@@ -147,10 +162,14 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, cons
ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
- *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N));
+ if((ccl_fetch(sd, object) != OBJECT_NONE) || (ccl_fetch(sd, type) == PRIMITIVE_LAMP)) {
+ *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N));
+ }
#else
- Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
- *N = normalize(transform_direction_transposed(&tfm, *N));
+ if(ccl_fetch(sd, object) != OBJECT_NONE) {
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
+ *N = normalize(transform_direction_transposed(&tfm, *N));
+ }
#endif
}
@@ -308,7 +327,7 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
{
- return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1);
+ return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE + 1);
}
/* Particle data from which object was instanced */
@@ -376,15 +395,33 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
ccl_device_inline float3 bvh_clamp_direction(float3 dir)
{
/* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
- float ooeps = 8.271806E-25f;
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
+ const ssef oopes(8.271806E-25f,8.271806E-25f,8.271806E-25f,0.0f);
+ const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
+ const ssef signdir = signmsk(dir.m128) | oopes;
+# ifndef __KERNEL_AVX__
+ ssef res = mask & ssef(dir);
+ res = _mm_or_ps(res,_mm_andnot_ps(mask, signdir));
+# else
+ ssef res = _mm_blendv_ps(signdir, dir, mask);
+# endif
+ return float3(res);
+#else /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
+ const float ooeps = 8.271806E-25f;
return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x),
(fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y),
(fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
+#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
}
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
{
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+ return rcp(dir);
+#else
return 1.0f / dir;
+#endif
}
/* Transform ray into object space to enter static object in BVH */
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index dd53282..4db121d 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -59,21 +59,33 @@ void triangle_intersect_precalc(float3 dir,
IsectPrecalc *isect_precalc)
{
/* Calculate dimension where the ray direction is maximal. */
+#ifndef __KERNEL_SSE__
int kz = util_max_axis(make_float3(fabsf(dir.x),
fabsf(dir.y),
fabsf(dir.z)));
int kx = kz + 1; if(kx == 3) kx = 0;
int ky = kx + 1; if(ky == 3) ky = 0;
+#else
+ int kx, ky, kz;
+ /* Avoiding mispredicted branch on direction. */
+ kz = util_max_axis(fabs(dir));
+ static const char inc_xaxis[] = {1, 2, 0, 55};
+ static const char inc_yaxis[] = {2, 0, 1, 55};
+ kx = inc_xaxis[kz];
+ ky = inc_yaxis[kz];
+#endif
+
+ float dir_kz = IDX(dir, kz);
/* Swap kx and ky dimensions to preserve winding direction of triangles. */
- if(IDX(dir, kz) < 0.0f) {
+ if(dir_kz < 0.0f) {
int tmp = kx;
kx = ky;
ky = tmp;
}
/* Calculate the shear constants. */
- float inv_dir_z = 1.0f / IDX(dir, kz);
+ float inv_dir_z = 1.0f / dir_kz;
isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
isect_precalc->Sz = inv_dir_z;
@@ -96,7 +108,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
float3 P,
uint visibility,
int object,
- int triAddr)
+ int prim_addr)
{
const int kx = isect_precalc->kx;
const int ky = isect_precalc->ky;
@@ -106,7 +118,68 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
+
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ const avxf avxf_P(P.m128, P.m128);
+
+ const avxf tri_ab = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 0);
+ const avxf tri_bc = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 1);
+
+ const avxf AB = tri_ab - avxf_P;
+ const avxf BC = tri_bc - avxf_P;
+
+ const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
+
+ const avxf AB_k = shuffle(AB, permute_mask);
+ const avxf BC_k = shuffle(BC, permute_mask);
+
+ /* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
+ const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
+
+ /* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
+ const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
+
+ const avxf Sxy(Sy, Sx, Sy, Sx);
+
+ /* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
+ const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
+
+ float ABBC_kz_array[8];
+ _mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
+
+ const float A_kz = ABBC_kz_array[0];
+ const float B_kz = ABBC_kz_array[2];
+ const float C_kz = ABBC_kz_array[6];
+
+ /* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
+ const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
+
+ const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
+
+ /* W U V
+ * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
+ */
+ const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
+
+ const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
+
+ /* Calculate scaled barycentric coordinates. */
+ float WUVW_array[4];
+ _mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
+
+ const float W = WUVW_array[0];
+ const float U = WUVW_array[1];
+ const float V = WUVW_array[2];
+
+ const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
+ const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
+ _mm256_setzero_ps(), 0));
+
+ if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
+ return false;
+ }
+#else
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
@@ -135,6 +208,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
{
return false;
}
+#endif
/* Calculate determinant. */
float det = U + V + W;
@@ -157,7 +231,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
#ifdef __VISIBILITY_FLAG__
/* visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+ if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
{
#ifdef __KERNEL_CUDA__
@@ -167,7 +241,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
#endif
/* Normalize U, V, W, and T. */
const float inv_det = 1.0f / det;
- isect->prim = triAddr;
+ isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
isect->u = U * inv_det;
@@ -190,7 +264,7 @@ ccl_device_inline void triangle_intersect_subsurface(
SubsurfaceIntersection *ss_isect,
float3 P,
int object,
- int triAddr,
+ int prim_addr,
float tmax,
uint *lcg_state,
int max_hits)
@@ -203,10 +277,71 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
+
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ const avxf avxf_P(P.m128, P.m128);
+
+ const avxf tri_ab = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 0);
+ const avxf tri_bc = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 1);
+
+ const avxf AB = tri_ab - avxf_P;
+ const avxf BC = tri_bc - avxf_P;
+
+ const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
+
+ const avxf AB_k = shuffle(AB, permuteMask);
+ const avxf BC_k = shuffle(BC, permuteMask);
+
+ /* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
+ const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
+
+ /* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
+ const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
+
+ const avxf Sxy(Sy, Sx, Sy, Sx);
+
+ /* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
+ const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
+
+ float ABBC_kz_array[8];
+ _mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
+
+ const float A_kz = ABBC_kz_array[0];
+ const float B_kz = ABBC_kz_array[2];
+ const float C_kz = ABBC_kz_array[6];
+
+ /* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
+ const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
+
+ const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
+
+ /* W U V
+ * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
+ */
+ const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */);
+
+ const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask;
+
+ /* Calculate scaled barycentric coordinates. */
+ float WUVW_array[4];
+ _mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
+
+ const float W = WUVW_array[0];
+ const float U = WUVW_array[1];
+ const float V = WUVW_array[2];
+
+ const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
+ const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
+ _mm256_setzero_ps(), 0));
+
+ if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
+ return;
+ }
+#else
const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
@@ -233,6 +368,7 @@ ccl_device_inline void triangle_intersect_subsurface(
{
return;
}
+#endif
/* Calculate determinant. */
float det = U + V + W;
@@ -279,7 +415,7 @@ ccl_device_inline void triangle_intersect_subsurface(
/* record intersection */
Intersection *isect = &ss_isect->hits[hit];
- isect->prim = triAddr;
+ isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
isect->u = U * inv_det;
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index fd97a63..03724c9 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
/* Return position normalized to 0..1 in mesh bounds */
-#if defined(__KERNEL_GPU__) && __CUDA_ARCH__ < 300
+#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
{
float4 r;
@@ -42,7 +42,7 @@ ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
}
return r;
}
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_CUDA__ */
ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
const ShaderData *sd,
@@ -64,8 +64,8 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_GPU__
+ float3 P = volume_normalized_position(kg, sd, ccl_fetch(sd, P));
+#ifdef __KERNEL_CUDA__
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
@@ -73,6 +73,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
# else
float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
# endif
+#elif defined(__KERNEL_OPENCL__)
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
@@ -89,14 +91,16 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
- float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_GPU__
+ float3 P = volume_normalized_position(kg, sd, ccl_fetch(sd, P));
+#ifdef __KERNEL_CUDA__
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
# else
float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
# endif
+#elif defined(__KERNEL_OPENCL__)
+ float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 2a21524..6c3ee6b 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -48,10 +48,10 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
eval->scatter = value;
}
else
- eval->diffuse = value;
-#else
- eval->diffuse = value;
#endif
+ {
+ eval->diffuse = value;
+ }
}
ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
@@ -72,10 +72,10 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3
/* skipping transparent, this function is used by for eval(), will be zero then */
}
else
- eval->diffuse += value;
-#else
- eval->diffuse += value;
#endif
+ {
+ eval->diffuse += value;
+ }
}
ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
@@ -90,13 +90,32 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
&& is_zero(eval->scatter);
}
else
+#endif
+ {
return is_zero(eval->diffuse);
-#else
- return is_zero(eval->diffuse);
+ }
+}
+
+ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
+{
+#ifdef __PASSES__
+ if(eval->use_light_pass) {
+ eval->diffuse *= value;
+ eval->glossy *= value;
+ eval->transmission *= value;
+ eval->subsurface *= value;
+ eval->scatter *= value;
+
+ /* skipping transparent, this function is used by for eval(), will be zero then */
+ }
+ else
#endif
+ {
+ eval->diffuse *= value;
+ }
}
-ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
+ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
{
#ifdef __PASSES__
if(eval->use_light_pass) {
@@ -115,6 +134,17 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
#endif
}
+ccl_device_inline float3 bsdf_eval_sum(BsdfEval *eval)
+{
+#ifdef __PASSES__
+ if(eval->use_light_pass) {
+ return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter;
+ }
+ else
+#endif
+ return eval->diffuse;
+}
+
/* Path Radiance
*
* We accumulate different render passes separately. After summing at the end
@@ -164,10 +194,10 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
L->mist = 0.0f;
}
else
- L->emission = make_float3(0.0f, 0.0f, 0.0f);
-#else
- L->emission = make_float3(0.0f, 0.0f, 0.0f);
#endif
+ {
+ L->emission = make_float3(0.0f, 0.0f, 0.0f);
+ }
}
ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput,
@@ -193,16 +223,15 @@ ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space
}
else {
/* transparent bounce before first hit, or indirectly visible through BSDF */
- float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent +
- bsdf_eval->subsurface + bsdf_eval->scatter) * inverse_pdf;
+ float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
*throughput *= sum;
}
}
else
- *throughput *= bsdf_eval->diffuse*inverse_pdf;
-#else
- *throughput *= bsdf_eval->diffuse*inverse_pdf;
#endif
+ {
+ *throughput *= bsdf_eval->diffuse*inverse_pdf;
+ }
}
ccl_device_inline void path_radiance_accum_emission(PathRadiance *L, float3 throughput, float3 value, int bounce)
@@ -217,10 +246,10 @@ ccl_device_inline void path_radiance_accum_emission(PathRadiance *L, float3 thro
L->indirect += throughput*value;
}
else
- L->emission += throughput*value;
-#else
- L->emission += throughput*value;
#endif
+ {
+ L->emission += throughput*value;
+ }
}
ccl_device_inline void path_radiance_accum_ao(PathRadiance *L, float3 throughput, float3 alpha, float3 bsdf, float3 ao, int bounce)
@@ -238,10 +267,10 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L, float3 throughput
}
}
else
- L->emission += throughput*bsdf*ao;
-#else
- L->emission += throughput*bsdf*ao;
#endif
+ {
+ L->emission += throughput*bsdf*ao;
+ }
}
ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 throughput, BsdfEval *bsdf_eval, float3 shadow, float shadow_fac, int bounce, bool is_lamp)
@@ -264,15 +293,14 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through
}
else {
/* indirectly visible lighting after BSDF bounce */
- float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface + bsdf_eval->scatter;
- L->indirect += throughput*sum*shadow;
+ L->indirect += throughput*bsdf_eval_sum(bsdf_eval)*shadow;
}
}
else
- L->emission += throughput*bsdf_eval->diffuse*shadow;
-#else
- L->emission += throughput*bsdf_eval->diffuse*shadow;
#endif
+ {
+ L->emission += throughput*bsdf_eval->diffuse*shadow;
+ }
}
ccl_device_inline void path_radiance_accum_background(PathRadiance *L, float3 throughput, float3 value, int bounce)
@@ -287,10 +315,10 @@ ccl_device_inline void path_radiance_accum_background(PathRadiance *L, float3 th
L->indirect += throughput*value;
}
else
- L->emission += throughput*value;
-#else
- L->emission += throughput*value;
#endif
+ {
+ L->emission += throughput*value;
+ }
}
ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
@@ -433,10 +461,10 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
/* No Light Passes */
else
- L_sum = L->emission;
-#else
- L_sum = L->emission;
#endif
+ {
+ L_sum = L->emission;
+ }
/* Reject invalid value */
float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
@@ -465,14 +493,12 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
L->indirect_subsurface += L_sample->indirect_subsurface*fac;
L->indirect_scatter += L_sample->indirect_scatter*fac;
- L->emission += L_sample->emission*fac;
L->background += L_sample->background*fac;
L->ao += L_sample->ao*fac;
L->shadow += L_sample->shadow*fac;
L->mist += L_sample->mist*fac;
-#else
- L->emission += L_sample->emission * fac;
#endif
+ L->emission += L_sample->emission * fac;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index fd9207f..c32ac6c 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -63,7 +63,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
- kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
+ kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput, shader_bsdf_alpha(kg, sd));
}
/* sample emission */
@@ -320,7 +320,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
P, Ng, Ng,
shader, object, prim,
u, v, 1.0f, 0.5f,
- !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED));
+ !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED),
+ LAMP_NONE);
sd.I = sd.N;
/* update differentials */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 7b30df0..9d1f3bd 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -71,6 +71,20 @@ template<typename T> struct texture {
return data[index];
}
+#ifdef __KERNEL_AVX__
+ /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
+ * compatibility with existing indicies and data structures.
+ */
+ ccl_always_inline avxf fetch_avxf(const int index)
+ {
+ kernel_assert(index >= 0 && (index+1) < width);
+ ssef *ssefData = (ssef*)data;
+ ssef *ssefNodeData = &ssefData[index];
+ return _mm256_loadu_ps((float *)ssefNodeData);
+ }
+
+#endif
+
#ifdef __KERNEL_SSE2__
ccl_always_inline ssef fetch_ssef(int index)
{
@@ -506,6 +520,7 @@ typedef texture_image<half4> texture_image_half4;
/* Macros to handle different memory storage on different devices */
#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
+#define kernel_tex_fetch_avxf(tex, index) (kg->tex.fetch_avxf(index))
#define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
#define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h
index 24d6458..5647bba 100644
--- a/intern/cycles/kernel/kernel_debug.h
+++ b/intern/cycles/kernel/kernel_debug.h
@@ -18,8 +18,9 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void debug_data_init(DebugData *debug_data)
{
- debug_data->num_bvh_traversal_steps = 0;
+ debug_data->num_bvh_traversed_nodes = 0;
debug_data->num_bvh_traversed_instances = 0;
+ debug_data->num_bvh_intersections = 0;
debug_data->num_ray_bounces = 0;
}
@@ -30,16 +31,21 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
int sample)
{
int flag = kernel_data.film.pass_flag;
- if(flag & PASS_BVH_TRAVERSAL_STEPS) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversal_steps,
+ if(flag & PASS_BVH_TRAVERSED_NODES) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
sample,
- debug_data->num_bvh_traversal_steps);
+ debug_data->num_bvh_traversed_nodes);
}
if(flag & PASS_BVH_TRAVERSED_INSTANCES) {
kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
sample,
debug_data->num_bvh_traversed_instances);
}
+ if(flag & PASS_BVH_INTERSECTIONS) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
+ sample,
+ debug_data->num_bvh_intersections);
+ }
if(flag & PASS_RAY_BOUNCES) {
kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
sample,
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 457887f..8c7c651 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -29,6 +29,8 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
/* setup shading at emitter */
float3 eval;
+ int shader_flag = kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE);
+
#ifdef __BACKGROUND_MIS__
if(ls->type == LIGHT_BACKGROUND) {
Ray ray;
@@ -49,11 +51,21 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
}
else
#endif
+ if(shader_flag & SD_HAS_CONSTANT_EMISSION)
+ {
+ eval.x = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 2));
+ eval.y = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 3));
+ eval.z = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 4));
+ if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
+ ls->Ng = -ls->Ng;
+ }
+ }
+ else
{
shader_setup_from_sample(kg, emission_sd,
ls->P, ls->Ng, I,
ls->shader, ls->object, ls->prim,
- ls->u, ls->v, t, time, false);
+ ls->u, ls->v, t, time, false, ls->lamp);
ls->Ng = ccl_fetch(emission_sd, Ng);
@@ -82,7 +94,8 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
BsdfEval *eval,
- bool *is_lamp)
+ bool *is_lamp,
+ float rand_terminate)
{
if(ls->pdf == 0.0f)
return false;
@@ -122,7 +135,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS);
#endif
- bsdf_eval_mul(eval, light_eval/ls->pdf);
+ bsdf_eval_mul3(eval, light_eval/ls->pdf);
#ifdef __PASSES__
/* use visibility flag to skip lights */
@@ -143,6 +156,16 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
if(bsdf_eval_is_zero(eval))
return false;
+ if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+ float probability = max3(bsdf_eval_sum(eval)) * kernel_data.integrator.light_inv_rr_threshold;
+ if(probability < 1.0f) {
+ if(rand_terminate >= probability) {
+ return false;
+ }
+ bsdf_eval_mul(eval, 1.0f / probability);
+ }
+ }
+
if(ls->shader & SHADER_CAST_SHADOW) {
/* setup ray */
bool transmit = (dot(ccl_fetch(sd, Ng), ls->D) < 0.0f);
diff --git a/intern/cycles/kernel/kernel_image_opencl.h b/intern/cycles/kernel/kernel_image_opencl.h
new file mode 100644
index 0000000..0352c58
--- /dev/null
+++ b/intern/cycles/kernel/kernel_image_opencl.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* For OpenCL all images are packed in a single array, and we do manual lookup
+ * and interpolation. */
+
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
+{
+ /* Float4 */
+ if(id < TEX_START_BYTE4_OPENCL) {
+ return kernel_tex_fetch(__tex_image_float4_packed, offset);
+ }
+ /* Byte4 */
+ else if(id < TEX_START_FLOAT_OPENCL) {
+ uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
+ float f = 1.0f/255.0f;
+ return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+ }
+ /* Float */
+ else if(id < TEX_START_BYTE_OPENCL) {
+ float f = kernel_tex_fetch(__tex_image_float_packed, offset);
+ return make_float4(f, f, f, 1.0f);
+ }
+ /* Byte */
+ else {
+ uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
+ float f = r * (1.0f/255.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+}
+
+ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
+{
+ x %= width;
+ if(x < 0)
+ x += width;
+ return x;
+}
+
+ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
+{
+ return clamp(x, 0, width-1);
+}
+
+ccl_device_inline float svm_image_texture_frac(float x, int *ix)
+{
+ int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
+ *ix = i;
+ return x - (float)i;
+}
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
+{
+ uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2);
+ uint width = info.x;
+ uint height = info.y;
+ uint offset = info.z;
+
+ /* Image Options */
+ uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
+ uint extension;
+ if(info.w & (1 << 1))
+ extension = EXTENSION_REPEAT;
+ else if(info.w & (1 << 2))
+ extension = EXTENSION_EXTEND;
+ else
+ extension = EXTENSION_CLIP;
+
+ float4 r;
+ int ix, iy, nix, niy;
+ if(interpolation == INTERPOLATION_CLOSEST) {
+ svm_image_texture_frac(x*width, &ix);
+ svm_image_texture_frac(y*height, &iy);
+
+ if(extension == EXTENSION_REPEAT) {
+ ix = svm_image_texture_wrap_periodic(ix, width);
+ iy = svm_image_texture_wrap_periodic(iy, height);
+ }
+ else {
+ if(extension == EXTENSION_CLIP) {
+ if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+ /* Fall through. */
+ /* EXTENSION_EXTEND */
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
+ }
+
+ r = svm_image_texture_read(kg, id, offset + ix + iy*width);
+ }
+ else { /* INTERPOLATION_LINEAR */
+ float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
+
+ if(extension == EXTENSION_REPEAT) {
+ ix = svm_image_texture_wrap_periodic(ix, width);
+ iy = svm_image_texture_wrap_periodic(iy, height);
+
+ nix = svm_image_texture_wrap_periodic(ix+1, width);
+ niy = svm_image_texture_wrap_periodic(iy+1, height);
+ }
+ else {
+ if(extension == EXTENSION_CLIP) {
+ if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+ nix = svm_image_texture_wrap_clamp(ix+1, width);
+ niy = svm_image_texture_wrap_clamp(iy+1, height);
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
+ }
+
+ r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
+ r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
+ r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
+ r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
+ }
+
+ return r;
+}
+
+
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
+{
+ uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2);
+ uint width = info.x;
+ uint height = info.y;
+ uint offset = info.z;
+ uint depth = kernel_tex_fetch(__tex_image_packed_info, id*2+1).x;
+
+ /* Image Options */
+ uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
+ uint extension;
+ if(info.w & (1 << 1))
+ extension = EXTENSION_REPEAT;
+ else if(info.w & (1 << 2))
+ extension = EXTENSION_EXTEND;
+ else
+ extension = EXTENSION_CLIP;
+
+ float4 r;
+ int ix, iy, iz, nix, niy, niz;
+ if(interpolation == INTERPOLATION_CLOSEST) {
+ svm_image_texture_frac(x*width, &ix);
+ svm_image_texture_frac(y*height, &iy);
+ svm_image_texture_frac(z*depth, &iz);
+
+ if(extension == EXTENSION_REPEAT) {
+ ix = svm_image_texture_wrap_periodic(ix, width);
+ iy = svm_image_texture_wrap_periodic(iy, height);
+ iz = svm_image_texture_wrap_periodic(iz, depth);
+ }
+ else {
+ if(extension == EXTENSION_CLIP) {
+ if(x < 0.0f || y < 0.0f || z < 0.0f ||
+ x > 1.0f || y > 1.0f || z > 1.0f)
+ {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+ /* Fall through. */
+ /* EXTENSION_EXTEND */
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
+ iz = svm_image_texture_wrap_clamp(iz, depth);
+ }
+ r = svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
+ }
+ else { /* INTERPOLATION_LINEAR */
+ float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
+ float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
+
+ if(extension == EXTENSION_REPEAT) {
+ ix = svm_image_texture_wrap_periodic(ix, width);
+ iy = svm_image_texture_wrap_periodic(iy, height);
+ iz = svm_image_texture_wrap_periodic(iz, depth);
+
+ nix = svm_image_texture_wrap_periodic(ix+1, width);
+ niy = svm_image_texture_wrap_periodic(iy+1, height);
+ niz = svm_image_texture_wrap_periodic(iz+1, depth);
+ }
+ else {
+ if(extension == EXTENSION_CLIP)
+ if(x < 0.0f || y < 0.0f || z < 0.0f ||
+ x > 1.0f || y > 1.0f || z > 1.0f)
+ {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ /* Fall through. */
+ /* EXTENSION_EXTEND */
+ nix = svm_image_texture_wrap_clamp(ix+1, width);
+ niy = svm_image_texture_wrap_clamp(iy+1, height);
+ niz = svm_image_texture_wrap_clamp(iz+1, depth);
+
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
+ iz = svm_image_texture_wrap_clamp(iz, depth);
+ }
+
+ r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
+ r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + iz*width*height);
+ r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + iz*width*height);
+ r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + iz*width*height);
+
+ r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + niz*width*height);
+ r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height);
+ r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height);
+ r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height);
+
+ }
+
+ return r;
+}
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index aec7bc3..6754613 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -149,6 +149,15 @@ ccl_device_inline uint cmj_hash(uint i, uint p)
return i;
}
+ccl_device_inline uint cmj_hash_simple(uint i, uint p)
+{
+ i = (i ^ 61) ^ p;
+ i += i << 3;
+ i ^= i >> 4;
+ i *= 0x27d4eb2d;
+ return i;
+}
+
ccl_device_inline float cmj_randfloat(uint i, uint p)
{
return cmj_hash(i, p) * (1.0f / 4294967808.0f);
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 1e99f2c..d4cc36d 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -297,7 +297,7 @@ ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
float3 axisu = make_float3(data1.y, data1.z, data1.w);
float3 axisv = make_float3(data2.y, data2.z, data2.w);
- if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL))
+ if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL))
continue;
portal_pdf += area_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
@@ -510,7 +510,7 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
return t*t/cos_pi;
}
-ccl_device_inline void lamp_light_sample(KernelGlobals *kg,
+ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
int lamp,
float randu, float randv,
float3 P,
@@ -581,7 +581,14 @@ ccl_device_inline void lamp_light_sample(KernelGlobals *kg,
/* spot light attenuation */
float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2);
ls->eval_fac *= spot_light_attenuation(data1, data2, ls);
+ if(ls->eval_fac == 0.0f) {
+ return false;
+ }
}
+ float2 uv = map_to_sphere(ls->Ng);
+ ls->u = uv.x;
+ ls->v = uv.y;
+
ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
}
else {
@@ -593,23 +600,31 @@ ccl_device_inline void lamp_light_sample(KernelGlobals *kg,
float3 axisv = make_float3(data2.y, data2.z, data2.w);
float3 D = make_float3(data3.y, data3.z, data3.w);
+ if(dot(ls->P - P, D) > 0.0f) {
+ return false;
+ }
+
+ float3 inplane = ls->P;
ls->pdf = area_light_sample(P, &ls->P,
axisu, axisv,
randu, randv,
true);
+ inplane = ls->P - inplane;
+ ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
+ ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
+
ls->Ng = D;
ls->D = normalize_len(ls->P - P, &ls->t);
float invarea = data2.x;
ls->eval_fac = 0.25f*invarea;
-
- if(dot(ls->D, D) > 0.0f)
- ls->pdf = 0.0f;
}
ls->eval_fac *= kernel_data.integrator.inv_pdf_lights;
}
+
+ return (ls->pdf > 0.0f);
}
ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
@@ -700,6 +715,9 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
if(ls->eval_fac == 0.0f)
return false;
}
+ float2 uv = map_to_sphere(ls->Ng);
+ ls->u = uv.x;
+ ls->v = uv.y;
/* compute pdf */
if(ls->t != FLT_MAX)
@@ -724,8 +742,10 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
float3 light_P = make_float3(data0.y, data0.z, data0.w);
- if(!ray_quad_intersect(P, D, 0.0f, t,
- light_P, axisu, axisv, Ng, &ls->P, &ls->t))
+ if(!ray_quad_intersect(P, D, 0.0f, t, light_P,
+ axisu, axisv, Ng,
+ &ls->P, &ls->t,
+ &ls->u, &ls->v))
{
return false;
}
@@ -836,7 +856,7 @@ ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, i
return (bounce > __float_as_int(data4.x));
}
-ccl_device_noinline void light_sample(KernelGlobals *kg,
+ccl_device_noinline bool light_sample(KernelGlobals *kg,
float randt,
float randu,
float randv,
@@ -857,21 +877,20 @@ ccl_device_noinline void light_sample(KernelGlobals *kg,
int shader_flag = __float_as_int(l.z);
triangle_light_sample(kg, prim, object, randu, randv, time, ls);
-
/* compute incoming direction, distance and pdf */
ls->D = normalize_len(ls->P - P, &ls->t);
ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t);
ls->shader |= shader_flag;
+ return (ls->pdf > 0.0f);
}
else {
int lamp = -prim-1;
if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
- ls->pdf = 0.0f;
- return;
+ return false;
}
- lamp_light_sample(kg, lamp, randu, randv, P, ls);
+ return lamp_light_sample(kg, lamp, randu, randv, P, ls);
}
}
@@ -882,4 +901,3 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
}
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 20cf3fa..7aec47e 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -20,7 +20,7 @@ ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sam
{
ccl_global float *buf = buffer;
#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__)
- atomic_add_float(buf, value);
+ atomic_add_and_fetch_float(buf, value);
#else
*buf = (sample == 0)? value: *buf + value;
#endif // __SPLIT_KERNEL__ && __WORK_STEALING__
@@ -33,9 +33,9 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sa
ccl_global float *buf_y = buffer + 1;
ccl_global float *buf_z = buffer + 2;
- atomic_add_float(buf_x, value.x);
- atomic_add_float(buf_y, value.y);
- atomic_add_float(buf_z, value.z);
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
#else
ccl_global float3 *buf = (ccl_global float3*)buffer;
*buf = (sample == 0)? value: *buf + value;
@@ -50,10 +50,10 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sa
ccl_global float *buf_z = buffer + 2;
ccl_global float *buf_w = buffer + 3;
- atomic_add_float(buf_x, value.x);
- atomic_add_float(buf_y, value.y);
- atomic_add_float(buf_z, value.z);
- atomic_add_float(buf_w, value.w);
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
+ atomic_add_and_fetch_float(buf_w, value.w);
#else
ccl_global float4 *buf = (ccl_global float4*)buffer;
*buf = (sample == 0)? value: *buf + value;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 7558fb9..e25f259 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -53,6 +53,47 @@
CCL_NAMESPACE_BEGIN
+ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ float3 throughput,
+ float3 ao_alpha)
+{
+ /* todo: solve correlation */
+ float bsdf_u, bsdf_v;
+
+ path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+#ifdef __OBJECT_MOTION__
+ light_ray.time = ccl_fetch(sd, time);
+#endif /* __OBJECT_MOTION__ */
+ light_ray.dP = ccl_fetch(sd, dP);
+ light_ray.dD = differential3_zero();
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
+ path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
+ }
+ }
+}
+
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@@ -97,9 +138,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
state->bounce);
}
}
-#endif
+#endif /* __LAMP_MIS__ */
#ifdef __VOLUME__
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
@@ -198,7 +243,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
}
else
-# endif
+# endif /* __VOLUME_DECOUPLED__ */
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
@@ -230,10 +275,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
break;
}
}
-# endif
+# endif /* __VOLUME_SCATTER__ */
}
}
-#endif
+#endif /* __VOLUME__ */
if(!hit) {
#ifdef __BACKGROUND__
@@ -243,7 +288,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
throughput,
L_background,
state->bounce);
-#endif
+#endif /* __BACKGROUND__ */
break;
}
@@ -257,7 +302,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
#ifdef __BRANCHED_PATH__
shader_merge_closures(sd);
-#endif
+#endif /* __BRANCHED_PATH__ */
/* blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
@@ -280,7 +325,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
-#endif
+#endif /* __EMISSION__ */
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
@@ -305,42 +350,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- float3 ao_D;
- float ao_pdf;
- float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(sd->P, sd->Ng);
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
-# ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-# endif
- light_ray.dP = sd->dP;
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
- path_radiance_accum_ao(L,
- throughput,
- ao_alpha,
- ao_bsdf,
- ao_shadow,
- state->bounce);
- }
- }
+ kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
}
-#endif
+#endif /* __AO__ */
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
@@ -372,7 +384,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
false);
}
}
-#endif
+#endif /* __SUBSURFACE__ */
#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
if(kernel_data.integrator.use_direct_light) {
@@ -387,53 +399,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
L,
all);
}
-#endif
+#endif /* defined(__EMISSION__) && defined(__BRANCHED_PATH__) */
if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
break;
}
}
-ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- PathRadiance *L,
- PathState *state,
- RNG *rng,
- float3 throughput)
-{
- /* todo: solve correlation */
- float bsdf_u, bsdf_v;
-
- path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- float3 ao_D;
- float ao_pdf;
- float3 ao_alpha = shader_bsdf_alpha(kg, sd);
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
- light_ray.time = ccl_fetch(sd, time);
-#endif
- light_ray.dP = ccl_fetch(sd, dP);
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
- path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
- }
-}
-
#ifdef __SUBSURFACE__
# ifndef __KERNEL_CUDA__
ccl_device
@@ -481,7 +453,7 @@ bool kernel_path_subsurface_scatter(
ss_indirect->need_update_volume_stack =
kernel_data.integrator.use_volumes &&
ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
-# endif
+# endif /* __VOLUME__ */
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
@@ -524,7 +496,7 @@ bool kernel_path_subsurface_scatter(
{
# ifdef __LAMP_MIS__
hit_state->ray_t = 0.0f;
-# endif
+# endif /* __LAMP_MIS__ */
# ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
@@ -539,7 +511,7 @@ bool kernel_path_subsurface_scatter(
&volume_ray,
hit_state->volume_stack);
}
-# endif
+# endif /* __VOLUME__ */
path_radiance_reset_indirect(L);
ss_indirect->num_rays++;
}
@@ -625,14 +597,14 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
debug_data_init(&debug_data);
-#endif
+#endif /* __KERNEL_DEBUG__ */
#ifdef __SUBSURFACE__
SubsurfaceIndirectRays ss_indirect;
kernel_path_subsurface_init_indirect(&ss_indirect);
for(;;) {
-#endif
+#endif /* __SUBSURFACE__ */
/* path iteration */
for(;;) {
@@ -658,15 +630,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
#else
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
-#endif
+#endif /* __HAIR__ */
#ifdef __KERNEL_DEBUG__
if(state.flag & PATH_RAY_CAMERA) {
- debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
+ debug_data.num_bvh_intersections += isect.num_intersections;
}
debug_data.num_ray_bounces++;
-#endif
+#endif /* __KERNEL_DEBUG__ */
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
@@ -687,9 +660,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
path_radiance_accum_emission(&L, throughput, emission, state.bounce);
}
-#endif
+#endif /* __LAMP_MIS__ */
#ifdef __VOLUME__
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state.volume_stack);
+ }
/* volume attenuation, emission, scatter */
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
@@ -751,7 +728,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
}
}
else
-# endif
+# endif /* __VOLUME_DECOUPLED__ */
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
@@ -768,10 +745,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
else
break;
}
-# endif
+# endif /* __VOLUME_SCATTER__ */
}
}
-#endif
+#endif /* __VOLUME__ */
if(!hit) {
/* eval background shader if nothing hit */
@@ -780,7 +757,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif
+#endif /* __PASSES__ */
break;
}
@@ -788,7 +765,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
/* sample background shader */
float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
-#endif
+#endif /* __BACKGROUND__ */
break;
}
@@ -816,7 +793,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(sd.flag & SD_HOLDOUT_MASK)
break;
}
-#endif
+#endif /* __HOLDOUT__ */
/* holdout mask objects do not write data passes */
kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
@@ -839,7 +816,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
path_radiance_accum_emission(&L, throughput, emission, state.bounce);
}
-#endif
+#endif /* __EMISSION__ */
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
@@ -851,7 +828,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
}
else if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
-
if(terminate >= probability)
break;
@@ -861,9 +837,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
+ kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
}
-#endif
+#endif /* __AO__ */
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
@@ -918,7 +894,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __KERNEL_DEBUG__
kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
-#endif
+#endif /* __KERNEL_DEBUG__ */
return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
}
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index cdb07db..72a8d98 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -51,7 +51,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
light_ray.t = kernel_data.background.ao_distance;
#ifdef __OBJECT_MOTION__
light_ray.time = ccl_fetch(sd, time);
-#endif
+#endif /* __OBJECT_MOTION__ */
light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
@@ -169,7 +169,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
-#endif
+#endif /* __VOLUME__ */
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
@@ -200,7 +200,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
&volume_ray,
hit_state.volume_stack);
}
-#endif
+#endif /* __VOLUME__ */
#ifdef __EMISSION__
/* direct light */
@@ -217,7 +217,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
L,
all);
}
-#endif
+#endif /* __EMISSION__ */
/* indirect light */
kernel_branched_path_surface_indirect_light(
@@ -234,7 +234,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
}
}
}
-#endif
+#endif /* __SUBSURFACE__ */
ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
{
@@ -256,7 +256,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
debug_data_init(&debug_data);
-#endif
+#endif /* __KERNEL_DEBUG__ */
/* Main Loop
* Here we only handle transparency intersections from the camera ray.
@@ -285,15 +285,20 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
#else
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
-#endif
+#endif /* __HAIR__ */
#ifdef __KERNEL_DEBUG__
- debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
+ debug_data.num_bvh_intersections += isect.num_intersections;
debug_data.num_ray_bounces++;
-#endif
+#endif /* __KERNEL_DEBUG__ */
#ifdef __VOLUME__
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state.volume_stack);
+ }
/* volume attenuation, emission, scatter */
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
@@ -432,14 +437,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_reset_indirect(&L);
}
}
-#endif
+#endif /* __VOLUME_SCATTER__ */
}
/* todo: avoid this calculation using decoupled ray marching */
kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
-#endif
+#endif /* __VOLUME_DECOUPLED__ */
}
-#endif
+#endif /* __VOLUME__ */
if(!hit) {
/* eval background shader if nothing hit */
@@ -448,7 +453,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif
+#endif /* __PASSES__ */
break;
}
@@ -456,7 +461,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* sample background shader */
float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
-#endif
+#endif /* __BACKGROUND__ */
break;
}
@@ -484,7 +489,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(sd.flag & SD_HOLDOUT_MASK)
break;
}
-#endif
+#endif /* __HOLDOUT__ */
/* holdout mask objects do not write data passes */
kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
@@ -495,7 +500,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
path_radiance_accum_emission(&L, throughput, emission, state.bounce);
}
-#endif
+#endif /* __EMISSION__ */
/* transparency termination */
if(state.flag & PATH_RAY_TRANSPARENT) {
@@ -522,7 +527,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
}
-#endif
+#endif /* __AO__ */
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */
@@ -530,7 +535,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
&L, &state, rng, &ray, throughput);
}
-#endif
+#endif /* __SUBSURFACE__ */
if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
PathState hit_state = state;
@@ -542,7 +547,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
kernel_branched_path_surface_connect_light(kg, rng,
&sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
}
-#endif
+#endif /* __EMISSION__ */
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng,
@@ -567,12 +572,12 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
ray.dP = sd.dP;
ray.dD.dx = -sd.dI.dx;
ray.dD.dy = -sd.dI.dy;
-#endif
+#endif /* __RAY_DIFFERENTIALS__ */
#ifdef __VOLUME__
/* enter/exit volume */
kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
-#endif
+#endif /* __VOLUME__ */
}
float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
@@ -581,7 +586,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __KERNEL_DEBUG__
kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
-#endif
+#endif /* __KERNEL_DEBUG__ */
return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
}
diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h
index 1912dfa..7b90355 100644
--- a/intern/cycles/kernel/kernel_path_common.h
+++ b/intern/cycles/kernel/kernel_path_common.h
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include "util_hash.h"
+
CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
@@ -28,6 +30,10 @@ ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
int num_samples = kernel_data.integrator.aa_samples;
+ if(sample == kernel_data.integrator.start_sample) {
+ *rng_state = hash_int_2d(x, y);
+ }
+
path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
/* sample camera ray */
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 250b8e9..fea503d 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -46,23 +46,26 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
RNG lamp_rng = cmj_hash(*rng, i);
- if(kernel_data.integrator.pdf_triangles != 0.0f)
- num_samples_inv *= 0.5f;
-
for(int j = 0; j < num_samples; j++) {
float light_u, light_v;
path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_branched_rng_light_termination(kg, &lamp_rng, state, j, num_samples);
LightSample ls;
- lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ if(lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls)) {
+ /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled.
+ * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */
+ if(kernel_data.integrator.pdf_triangles != 0.0f)
+ ls.pdf *= 2.0f;
+
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
}
}
}
@@ -73,28 +76,30 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
float num_samples_inv = num_samples_adjust/num_samples;
- if(kernel_data.integrator.num_all_lights)
- num_samples_inv *= 0.5f;
-
for(int j = 0; j < num_samples; j++) {
float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
float light_u, light_v;
path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_branched_rng_light_termination(kg, rng, state, j, num_samples);
/* only sample triangle lights */
if(kernel_data.integrator.num_all_lights)
light_t = 0.5f*light_t;
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ if(light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls)) {
+ /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */
+ if(kernel_data.integrator.num_all_lights)
+ ls.pdf *= 2.0f;
+
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
}
}
}
@@ -105,18 +110,19 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
float light_u, light_v;
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_state_rng_light_termination(kg, rng, state);
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
-
- /* sample random light */
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
+ if(light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls)) {
+ /* sample random light */
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
+ }
}
}
}
@@ -206,15 +212,16 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
#endif
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
+ if(light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls)) {
+ float terminate = path_state_rng_light_termination(kg, rng, state);
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
}
}
#endif
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index 5fd4f2f..3d3b738 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -46,17 +46,17 @@ ccl_device_inline void kernel_path_volume_connect_light(
light_ray.time = sd->time;
# endif
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
- if(ls.pdf == 0.0f)
- return;
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
+ if(light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls))
+ {
+ float terminate = path_state_rng_light_termination(kg, rng, state);
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
}
}
#endif
@@ -137,16 +137,13 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
float num_samples_inv = 1.0f/(num_samples*kernel_data.integrator.num_all_lights);
RNG lamp_rng = cmj_hash(*rng, i);
- if(kernel_data.integrator.pdf_triangles != 0.0f)
- num_samples_inv *= 0.5f;
-
for(int j = 0; j < num_samples; j++) {
/* sample random position on given light */
float light_u, light_v;
path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
+ lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
float3 tp = throughput;
@@ -156,23 +153,24 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
-
+
(void)result;
kernel_assert(result == VOLUME_PATH_SCATTERED);
/* todo: split up light_sample so we don't have to call it again with new position */
- lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls);
-
- if(ls.pdf == 0.0f)
- continue;
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ if(lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
+ if(kernel_data.integrator.pdf_triangles != 0.0f)
+ ls.pdf *= 2.0f;
+
+ float terminate = path_branched_rng_light_termination(kg, rng, state, j, num_samples);
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
}
}
}
@@ -183,9 +181,6 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
int num_samples = kernel_data.integrator.mesh_light_samples;
float num_samples_inv = 1.0f/num_samples;
- if(kernel_data.integrator.num_all_lights)
- num_samples_inv *= 0.5f;
-
for(int j = 0; j < num_samples; j++) {
/* sample random position on random triangle */
float light_t = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_LIGHT);
@@ -212,18 +207,19 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
kernel_assert(result == VOLUME_PATH_SCATTERED);
/* todo: split up light_sample so we don't have to call it again with new position */
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
-
- if(ls.pdf == 0.0f)
- continue;
-
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ if(light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ if(kernel_data.integrator.num_all_lights)
+ ls.pdf *= 2.0f;
+
+ float terminate = path_branched_rng_light_termination(kg, rng, state, j, num_samples);
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
}
}
}
@@ -251,19 +247,17 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
kernel_assert(result == VOLUME_PATH_SCATTERED);
/* todo: split up light_sample so we don't have to call it again with new position */
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
-
- if(ls.pdf == 0.0f)
- return;
-
- /* sample random light */
- if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ if(light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+ /* sample random light */
+ float terminate = path_state_rng_light_termination(kg, rng, state);
+ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
}
}
}
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index ba714b6..9a2b088 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -235,7 +235,7 @@ ccl_device_inline void spherical_stereo_transform(KernelGlobals *kg, float3 *P,
if(kernel_data.cam.pole_merge_angle_to > 0.0f) {
const float pole_merge_angle_from = kernel_data.cam.pole_merge_angle_from,
pole_merge_angle_to = kernel_data.cam.pole_merge_angle_to;
- float altitude = fabsf(safe_asinf(D->z));
+ float altitude = fabsf(safe_asinf((*D).z));
if(altitude > pole_merge_angle_to) {
interocular_offset = 0.0f;
}
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 4a76ffd..e773753 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -120,13 +120,11 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *
/* Cranly-Patterson rotation using rng seed */
float shift;
- /* using the same *rng value to offset seems to give correlation issues,
- * we could hash it with the dimension but this has a performance impact,
- * we need to find a solution for this */
- if(dimension & 1)
- shift = (*rng >> 16) * (1.0f/(float)0xFFFF);
- else
- shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF);
+ /* Hash rng with dimension to solve correlation issues.
+ * See T38710, T50116.
+ */
+ RNG tmp_rng = cmj_hash_simple(dimension, *rng);
+ shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
return r + shift - floorf(r + shift);
#endif
@@ -300,6 +298,23 @@ ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, ccl_addr_space RN
path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
}
+/* Utitility functions to get light termination value, since it might not be needed in many cases. */
+ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state)
+{
+ if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+ return path_state_rng_1D_for_decision(kg, rng, state, PRNG_LIGHT_TERMINATE);
+ }
+ return 0.0f;
+}
+
+ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches)
+{
+ if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+ return path_branched_rng_1D_for_decision(kg, rng, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
+ }
+ return 0.0f;
+}
+
ccl_device_inline void path_state_branch(PathState *state, int branch, int num_branches)
{
/* path is splitting into a branch, adjust so that each branch
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index daf8c20..c1b3153 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -111,7 +111,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
ccl_fetch(sd, I) = -ray->D;
- ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+ ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
#ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) {
@@ -195,7 +195,7 @@ void shader_setup_from_subsurface(
motion_triangle_shader_setup(kg, sd, isect, ray, true);
}
- sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
+ sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
# ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) {
@@ -242,7 +242,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
int shader, int object, int prim,
float u, float v, float t,
float time,
- bool object_space)
+ bool object_space,
+ int lamp)
{
/* vectors */
ccl_fetch(sd, P) = P;
@@ -250,7 +251,12 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
ccl_fetch(sd, Ng) = Ng;
ccl_fetch(sd, I) = I;
ccl_fetch(sd, shader) = shader;
- ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
+ if(prim != PRIM_NONE)
+ ccl_fetch(sd, type) = PRIMITIVE_TRIANGLE;
+ else if(lamp != LAMP_NONE)
+ ccl_fetch(sd, type) = PRIMITIVE_LAMP;
+ else
+ ccl_fetch(sd, type) = PRIMITIVE_NONE;
/* primitive */
#ifdef __INSTANCING__
@@ -264,18 +270,19 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
#endif
ccl_fetch(sd, ray_length) = t;
- ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+ ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
if(ccl_fetch(sd, object) != OBJECT_NONE) {
ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
#ifdef __OBJECT_MOTION__
shader_setup_object_transforms(kg, sd, time);
+ ccl_fetch(sd, time) = time;
}
-
- ccl_fetch(sd, time) = time;
-#else
- }
+ else if(lamp != LAMP_NONE) {
+ ccl_fetch(sd, ob_tfm) = lamp_fetch_transform(kg, lamp, false);
+ ccl_fetch(sd, ob_itfm) = lamp_fetch_transform(kg, lamp, true);
#endif
+ }
/* transform into world space */
if(object_space) {
@@ -357,7 +364,8 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
P, Ng, I,
shader, object, prim,
u, v, 0.0f, 0.5f,
- !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED));
+ !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED),
+ LAMP_NONE);
}
/* ShaderData setup from ray into background */
@@ -370,7 +378,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
ccl_fetch(sd, Ng) = -ray->D;
ccl_fetch(sd, I) = -ray->D;
ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
- ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+ ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
#ifdef __OBJECT_MOTION__
ccl_fetch(sd, time) = ray->time;
#endif
@@ -561,7 +569,7 @@ void shader_bsdf_eval(KernelGlobals *kg,
_shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
if(use_mis) {
float weight = power_heuristic(light_pdf, pdf);
- bsdf_eval_mul(eval, make_float3(weight, weight, weight));
+ bsdf_eval_mul(eval, weight);
}
}
}
@@ -1027,7 +1035,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
sd->shader = stack[i].shader;
sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
- sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
+ sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
if(sd->object != OBJECT_NONE) {
sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
@@ -1100,7 +1108,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
shader = __float_as_int(str.z);
}
#endif
- int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+ int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
}
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 169f03f..2981f6a 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -125,14 +125,14 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd,
/* stop if all light is blocked */
if(is_zero(throughput)) {
- /* free dynamic storage */
return true;
}
/* move ray forward */
ray->P = shadow_sd->P;
- if(ray->t != FLT_MAX)
+ if(ray->t != FLT_MAX) {
ray->D = normalize_len(Pend - ray->P, &ray->t);
+ }
#ifdef __VOLUME__
/* exit/enter volume */
@@ -234,8 +234,9 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
return false;
}
- if(!shader_transparent_shadow(kg, isect))
+ if(!shader_transparent_shadow(kg, isect)) {
return true;
+ }
#ifdef __VOLUME__
/* attenuation between last surface and next surface */
@@ -258,13 +259,16 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
- if(is_zero(throughput))
+ /* stop if all light is blocked */
+ if(is_zero(throughput)) {
return true;
+ }
/* move ray forward */
ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
- if(ray->t != FLT_MAX)
+ if(ray->t != FLT_MAX) {
ray->D = normalize_len(Pend - ray->P, &ray->t);
+ }
#ifdef __VOLUME__
/* exit/enter volume */
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 41bc9db..52c05b8 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -220,7 +220,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
*/
ccl_device_inline int subsurface_scatter_multi_intersect(
KernelGlobals *kg,
- SubsurfaceIntersection* ss_isect,
+ SubsurfaceIntersection *ss_isect,
ShaderData *sd,
ShaderClosure *sc,
uint *lcg_state,
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 1156b41..358db9e 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -37,11 +37,12 @@ CCL_NAMESPACE_BEGIN
/* constants */
#define OBJECT_SIZE 12
#define OBJECT_VECTOR_SIZE 6
-#define LIGHT_SIZE 5
+#define LIGHT_SIZE 11
#define FILTER_TABLE_SIZE 1024
#define RAMP_TABLE_SIZE 256
#define SHUTTER_TABLE_SIZE 256
#define PARTICLE_SIZE 5
+#define SHADER_SIZE 5
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4
@@ -191,6 +192,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __NO_PATCH_EVAL__
# undef __PATCH_EVAL__
#endif
+#ifdef __NO_TRANSPARENT__
+# undef __TRANSPARENT_SHADOWS__
+#endif
/* Random Numbers */
@@ -249,7 +253,7 @@ enum PathTraceDimension {
PRNG_LIGHT = 3,
PRNG_LIGHT_U = 4,
PRNG_LIGHT_V = 5,
- PRNG_UNUSED_3 = 6,
+ PRNG_LIGHT_TERMINATE = 6,
PRNG_TERMINATE = 7,
#ifdef __VOLUME__
@@ -341,9 +345,10 @@ typedef enum PassType {
PASS_SUBSURFACE_COLOR = (1 << 24),
PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */
#ifdef __KERNEL_DEBUG__
- PASS_BVH_TRAVERSAL_STEPS = (1 << 26),
+ PASS_BVH_TRAVERSED_NODES = (1 << 26),
PASS_BVH_TRAVERSED_INSTANCES = (1 << 27),
- PASS_RAY_BOUNCES = (1 << 28),
+ PASS_BVH_INTERSECTIONS = (1 << 28),
+ PASS_RAY_BOUNCES = (1 << 29),
#endif
} PassType;
@@ -538,33 +543,38 @@ typedef ccl_addr_space struct Intersection {
int type;
#ifdef __KERNEL_DEBUG__
- int num_traversal_steps;
+ int num_traversed_nodes;
int num_traversed_instances;
+ int num_intersections;
#endif
} Intersection;
/* Primitives */
typedef enum PrimitiveType {
- PRIMITIVE_NONE = 0,
- PRIMITIVE_TRIANGLE = 1,
- PRIMITIVE_MOTION_TRIANGLE = 2,
- PRIMITIVE_CURVE = 4,
- PRIMITIVE_MOTION_CURVE = 8,
+ PRIMITIVE_NONE = 0,
+ PRIMITIVE_TRIANGLE = (1 << 0),
+ PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
+ PRIMITIVE_CURVE = (1 << 2),
+ PRIMITIVE_MOTION_CURVE = (1 << 3),
+ /* Lamp primitive is not included below on purpose,
+ * since it is no real traceable primitive.
+ */
+ PRIMITIVE_LAMP = (1 << 4),
PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE),
PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE),
PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE),
PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE),
- /* Total number of different primitives.
+ /* Total number of different traceable primitives.
* NOTE: This is an actual value, not a bitflag.
*/
PRIMITIVE_NUM_TOTAL = 4,
} PrimitiveType;
-#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << 16) | type)
-#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> 16)
+#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
+#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM_TOTAL)
/* Attributes */
@@ -714,20 +724,21 @@ enum ShaderDataFlag {
SD_VOLUME_CUBIC = (1 << 20), /* use cubic interpolation for voxels */
SD_HAS_BUMP = (1 << 21), /* has data connected to the displacement input */
SD_HAS_DISPLACEMENT = (1 << 22), /* has true displacement */
+ SD_HAS_CONSTANT_EMISSION = (1 << 23), /* has constant emission (value stored in __shader_flag) */
SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|
SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|
SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS|
- SD_VOLUME_CUBIC|SD_HAS_BUMP|SD_HAS_DISPLACEMENT),
+ SD_VOLUME_CUBIC|SD_HAS_BUMP|SD_HAS_DISPLACEMENT|SD_HAS_CONSTANT_EMISSION),
/* object flags */
- SD_HOLDOUT_MASK = (1 << 23), /* holdout for camera rays */
- SD_OBJECT_MOTION = (1 << 24), /* has object motion blur */
- SD_TRANSFORM_APPLIED = (1 << 25), /* vertices have transform applied */
- SD_NEGATIVE_SCALE_APPLIED = (1 << 26), /* vertices have negative scale applied */
- SD_OBJECT_HAS_VOLUME = (1 << 27), /* object has a volume shader */
- SD_OBJECT_INTERSECTS_VOLUME = (1 << 28), /* object intersects AABB of an object with volume shader */
- SD_OBJECT_HAS_VERTEX_MOTION = (1 << 29), /* has position for motion vertices */
+ SD_HOLDOUT_MASK = (1 << 24), /* holdout for camera rays */
+ SD_OBJECT_MOTION = (1 << 25), /* has object motion blur */
+ SD_TRANSFORM_APPLIED = (1 << 26), /* vertices have transform applied */
+ SD_NEGATIVE_SCALE_APPLIED = (1 << 27), /* vertices have negative scale applied */
+ SD_OBJECT_HAS_VOLUME = (1 << 28), /* object has a volume shader */
+ SD_OBJECT_INTERSECTS_VOLUME = (1 << 29), /* object intersects AABB of an object with volume shader */
+ SD_OBJECT_HAS_VERTEX_MOTION = (1 << 30), /* has position for motion vertices */
SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED|
SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME|
@@ -827,7 +838,7 @@ typedef ccl_addr_space struct ShaderData {
ccl_soa_member(differential3, ray_dP);
#ifdef __OSL__
- struct KernelGlobals * osl_globals;
+ struct KernelGlobals *osl_globals;
struct PathState *osl_path_state;
#endif
} ShaderData;
@@ -1031,10 +1042,10 @@ typedef struct KernelFilm {
float mist_falloff;
#ifdef __KERNEL_DEBUG__
- int pass_bvh_traversal_steps;
+ int pass_bvh_traversed_nodes;
int pass_bvh_traversed_instances;
+ int pass_bvh_intersections;
int pass_ray_bounces;
- int pass_pad3;
#endif
} KernelFilm;
static_assert_align(KernelFilm, 16);
@@ -1119,8 +1130,9 @@ typedef struct KernelIntegrator {
float volume_step_size;
int volume_samples;
- int pad1;
- int pad2;
+ float light_inv_rr_threshold;
+
+ int start_sample;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
@@ -1178,10 +1190,9 @@ static_assert_align(KernelData, 16);
* really important here.
*/
typedef ccl_addr_space struct DebugData {
- // Total number of BVH node traversal steps and primitives intersections
- // for the camera rays.
- int num_bvh_traversal_steps;
+ int num_bvh_traversed_nodes;
int num_bvh_traversed_instances;
+ int num_bvh_intersections;
int num_ray_bounces;
} DebugData;
#endif
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 4ab51b8..c7cb29b 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -115,7 +115,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
{
for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
- int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
+ int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
if(shader_flag & SD_HETEROGENEOUS_VOLUME)
return true;
@@ -132,7 +132,7 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
int method = -1;
for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
- int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
+ int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
if(shader_flag & SD_VOLUME_MIS) {
return SD_VOLUME_MIS;
@@ -245,11 +245,18 @@ ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, floa
float t = ray->t;
float delta = dot((light_P - ray->P) , ray->D);
- float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if(UNLIKELY(D == 0.0f)) {
+ *pdf = 0.0f;
+ return 0.0f;
+ }
float theta_a = -atan2f(delta, D);
float theta_b = atan2f(t - delta, D);
float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
-
+ if(UNLIKELY(theta_b == theta_a)) {
+ *pdf = 0.0f;
+ return 0.0f;
+ }
*pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
return min(t, delta + t_); /* min is only for float precision errors */
@@ -258,13 +265,19 @@ ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, floa
ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
{
float delta = dot((light_P - ray->P) , ray->D);
- float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if(UNLIKELY(D == 0.0f)) {
+ return 0.0f;
+ }
float t = ray->t;
float t_ = sample_t - delta;
float theta_a = -atan2f(delta, D);
float theta_b = atan2f(t - delta, D);
+ if(UNLIKELY(theta_b == theta_a)) {
+ return 0.0f;
+ }
float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
@@ -569,17 +582,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
{
- /* workaround to fix correlation bug in T38710, can find better solution
- * in random number generator later, for now this is done here to not impact
- * performance of rendering without volumes */
- RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
-
shader_setup_from_volume(kg, sd, ray);
if(heterogeneous)
- return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
+ return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, rng);
else
- return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
+ return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
}
/* Decoupled Volume Sampling
@@ -958,6 +966,9 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
}
}
+ if(sample_t < 1e-6f) {
+ return VOLUME_PATH_SCATTERED;
+ }
/* compute transmittance up to this step */
if(step != segment->steps)
@@ -1251,4 +1262,30 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
}
#endif
+/* Clean stack after the last bounce.
+ *
+ * It is expected that all volumes are closed manifolds, so at the time when ray
+ * hits nothing (for example, it is a last bounce which goes to environment) the
+ * only expected volume in the stack is the world's one. All the rest volume
+ * entries should have been exited already.
+ *
+ * This isn't always true because of ray intersection precision issues, which
+ * could lead us to an infinite non-world volume in the stack, causing render
+ * artifacts.
+ *
+ * Use this function after the last bounce to get rid of all volumes apart from
+ * the world's one after the last bounce to avoid render artifacts.
+ */
+ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
+ VolumeStack *volume_stack)
+{
+ if(kernel_data.background.volume_shader != SHADER_NONE) {
+ /* Keep the world's volume in stack. */
+ volume_stack[1].shader = SHADER_NONE;
+ }
+ else {
+ volume_stack[0].shader = SHADER_NONE;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index f11c85d..72dbbd9 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -42,9 +42,11 @@
# define __KERNEL_SSE41__
# endif
# ifdef __AVX__
+# define __KERNEL_SSE__
# define __KERNEL_AVX__
# endif
# ifdef __AVX2__
+# define __KERNEL_SSE__
# define __KERNEL_AVX2__
# endif
#endif
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
index 533ab46..1350d9e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
@@ -20,6 +20,7 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
+# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
index 7351e2b..1a416e7 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
@@ -20,6 +20,7 @@
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
+# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 37907cd..a68f978 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -20,6 +20,7 @@
#include "../../kernel_math.h"
#include "../../kernel_types.h"
#include "../../kernel_globals.h"
+#include "../../kernel_image_opencl.h"
#include "../../kernel_film.h"
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 0f3edcb..eeccf9a 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -102,6 +102,8 @@ ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
#endif
ustring OSLRenderServices::u_path_ray_length("path:ray_length");
ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
+ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
+ustring OSLRenderServices::u_path_glossy_depth("path:glossy_depth");
ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth");
ustring OSLRenderServices::u_path_transmission_depth("path:transmission_depth");
ustring OSLRenderServices::u_trace("trace");
@@ -168,6 +170,12 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
return true;
}
+ else if(sd->type == PRIMITIVE_LAMP) {
+ Transform tfm = transform_transpose(sd->ob_tfm);
+ COPY_MATRIX44(&result, &tfm);
+
+ return true;
+ }
}
return false;
@@ -198,6 +206,12 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
return true;
}
+ else if(sd->type == PRIMITIVE_LAMP) {
+ Transform tfm = transform_transpose(sd->ob_itfm);
+ COPY_MATRIX44(&result, &tfm);
+
+ return true;
+ }
}
return false;
@@ -287,6 +301,12 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
return true;
}
+ else if(sd->type == PRIMITIVE_LAMP) {
+ Transform tfm = transform_transpose(sd->ob_tfm);
+ COPY_MATRIX44(&result, &tfm);
+
+ return true;
+ }
}
return false;
@@ -312,6 +332,12 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
return true;
}
+ else if(sd->type == PRIMITIVE_LAMP) {
+ Transform tfm = transform_transpose(sd->ob_itfm);
+ COPY_MATRIX44(&result, &tfm);
+
+ return true;
+ }
}
return false;
@@ -735,6 +761,24 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
int f = state->bounce;
return set_attribute_int(f, type, derivatives, val);
}
+ else if(name == u_path_diffuse_depth) {
+ /* Diffuse Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->diffuse_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if(name == u_path_glossy_depth) {
+ /* Glossy Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->glossy_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
+ else if(name == u_path_transmission_depth) {
+ /* Transmission Ray Depth */
+ PathState *state = sd->osl_path_state;
+ int f = state->transmission_bounce;
+ return set_attribute_int(f, type, derivatives, val);
+ }
else if(name == u_path_transparent_depth) {
/* Transparent Ray Depth */
PathState *state = sd->osl_path_state;
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 0f2e02c..ec34ca7 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -165,6 +165,8 @@ public:
static ustring u_curve_tangent_normal;
static ustring u_path_ray_length;
static ustring u_path_ray_depth;
+ static ustring u_path_diffuse_depth;
+ static ustring u_path_glossy_depth;
static ustring u_path_transparent_depth;
static ustring u_path_transmission_depth;
static ustring u_trace;
diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl
index d5e0a7d..c303594 100644
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -28,7 +28,7 @@ float brick_noise(int n) /* fast integer noise */
return 0.5 * ((float)nn / 1073741824.0);
}
-float brick(point p, float mortar_size, float bias,
+float brick(point p, float mortar_size, float mortar_smooth, float bias,
float BrickWidth, float row_height, float offset_amount, int offset_frequency,
float squash_amount, int squash_frequency, float tint)
{
@@ -51,9 +51,17 @@ float brick(point p, float mortar_size, float bias,
tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0);
- return (x < mortar_size || y < mortar_size ||
- x > (brick_width - mortar_size) ||
- y > (row_height - mortar_size)) ? 1.0 : 0.0;
+ float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
+ if(min_dist >= mortar_size) {
+ return 0.0;
+ }
+ else if(mortar_smooth == 0.0) {
+ return 1.0;
+ }
+ else {
+ min_dist = 1.0 - min_dist/mortar_size;
+ return smoothstep(0.0, mortar_smooth, min_dist);
+ }
}
shader node_brick_texture(
@@ -69,6 +77,7 @@ shader node_brick_texture(
color Mortar = 0.0,
float Scale = 5.0,
float MortarSize = 0.02,
+ float MortarSmooth = 0.0,
float Bias = 0.0,
float BrickWidth = 0.5,
float RowHeight = 0.25,
@@ -83,7 +92,7 @@ shader node_brick_texture(
float tint = 0.0;
color Col = Color1;
- Fac = brick(p * Scale, MortarSize, Bias, BrickWidth, RowHeight,
+ Fac = brick(p * Scale, MortarSize, MortarSmooth, Bias, BrickWidth, RowHeight,
offset, offset_frequency, squash, squash_frequency, tint);
if (Fac != 1.0) {
@@ -91,6 +100,6 @@ shader node_brick_texture(
Col = facm * Color1 + tint * Color2;
}
- Color = (Fac == 1.0) ? Mortar : Col;
+ Color = mix(Col, Mortar, Fac);
}
diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl
index a021a40..64fe4c2 100644
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -27,6 +27,8 @@ shader node_light_path(
output float IsVolumeScatterRay = 0.0,
output float RayLength = 0.0,
output float RayDepth = 0.0,
+ output float DiffuseDepth = 0.0,
+ output float GlossyDepth = 0.0,
output float TransparentDepth = 0.0,
output float TransmissionDepth = 0.0)
{
@@ -45,6 +47,14 @@ shader node_light_path(
getattribute("path:ray_depth", ray_depth);
RayDepth = (float)ray_depth;
+ int diffuse_depth;
+ getattribute("path:diffuse_depth", diffuse_depth);
+ DiffuseDepth = (float)diffuse_depth;
+
+ int glossy_depth;
+ getattribute("path:glossy_depth", glossy_depth);
+ GlossyDepth = (float)glossy_depth;
+
int transparent_depth;
getattribute("path:transparent_depth", transparent_depth);
TransparentDepth = (float)transparent_depth;
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index f42d0a9..9bfa71c 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -232,7 +232,8 @@ ccl_device char kernel_background_buffer_update(
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
enqueue_flag = 1;
- } else {
+ }
+ else {
/* These rays do not participate in path-iteration. */
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
/* Accumulate result in output buffer. */
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index e3dbc43..6e158d5 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -215,7 +215,8 @@ ccl_device void kernel_data_init(
#ifdef __KERNEL_DEBUG__
debug_data_init(&debugdata_coop[ray_index]);
#endif
- } else {
+ }
+ else {
/* These rays do not participate in path-iteration. */
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
/* Accumulate result in output buffer. */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index ebe9109..82ca188 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -72,32 +72,34 @@ ccl_device char kernel_direct_lighting(
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
float light_u, light_v;
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_state_rng_light_termination(kg, rng, state);
LightSample ls;
- light_sample(kg,
- light_t, light_u, light_v,
- ccl_fetch(sd, time),
- ccl_fetch(sd, P),
- state->bounce,
- &ls);
+ if(light_sample(kg,
+ light_t, light_u, light_v,
+ ccl_fetch(sd, time),
+ ccl_fetch(sd, P),
+ state->bounce,
+ &ls)) {
- Ray light_ray;
+ Ray light_ray;
#ifdef __OBJECT_MOTION__
- light_ray.time = ccl_fetch(sd, time);
+ light_ray.time = ccl_fetch(sd, time);
#endif
- BsdfEval L_light;
- bool is_lamp;
- if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp)) {
- /* Write intermediate data to global memory to access from
- * the next kernel.
- */
- LightRay_coop[ray_index] = light_ray;
- BSDFEval_coop[ray_index] = L_light;
- ISLamp_coop[ray_index] = is_lamp;
- /* Mark ray state for next shadow kernel. */
- ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
- enqueue_flag = 1;
+ BsdfEval L_light;
+ bool is_lamp;
+ if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ /* Write intermediate data to global memory to access from
+ * the next kernel.
+ */
+ LightRay_coop[ray_index] = light_ray;
+ BSDFEval_coop[ray_index] = L_light;
+ ISLamp_coop[ray_index] = is_lamp;
+ /* Mark ray state for next shadow kernel. */
+ ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
+ enqueue_flag = 1;
+ }
}
}
#endif /* __EMISSION__ */
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 78dada8..435d117 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -212,7 +212,8 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if(terminate >= probability) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
*enqueue_flag = 1;
- } else {
+ }
+ else {
throughput_coop[ray_index] = throughput/probability;
}
}
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 74da80b..816f3a6 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -126,7 +126,7 @@ ccl_device char kernel_next_iteration_setup(
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global float3 *throughput = &throughput_coop[ray_index];
ccl_global Ray *ray = &Ray_coop[ray_index];
- ccl_global RNG* rng = &rng_coop[ray_index];
+ ccl_global RNG *rng = &rng_coop[ray_index];
state = &PathState_coop[ray_index];
L = &PathRadiance_coop[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index fc4b4ee..2388580 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -116,8 +116,9 @@ ccl_device void kernel_scene_intersect(
#ifdef __KERNEL_DEBUG__
if(state.flag & PATH_RAY_CAMERA) {
- debug_data->num_bvh_traversal_steps += isect->num_traversal_steps;
+ debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes;
debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
+ debug_data->num_bvh_intersections += isect->num_intersections;
}
debug_data->num_ray_bounces++;
#endif
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 88d6dab..2135ee2 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -21,6 +21,7 @@
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
+#include "kernel_image_opencl.h"
#include "util_atomic.h"
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 9b0cf5a..14245cf 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -27,7 +27,7 @@ ccl_device_noinline float brick_noise(int n) /* fast integer noise */
return 0.5f * ((float)nn / 1073741824.0f);
}
-ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float bias,
+ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float mortar_smooth, float bias,
float brick_width, float row_height, float offset_amount, int offset_frequency,
float squash_amount, int squash_frequency)
{
@@ -47,30 +47,41 @@ ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float bias,
x = (p.x+offset) - brick_width*bricknum;
y = p.y - row_height*rownum;
- return make_float2(
- saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)),
+ float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
+ float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
- (x < mortar_size || y < mortar_size ||
- x > (brick_width - mortar_size) ||
- y > (row_height - mortar_size)) ? 1.0f : 0.0f);
+ float mortar;
+ if(min_dist >= mortar_size) {
+ mortar = 0.0f;
+ }
+ else if(mortar_smooth == 0.0f) {
+ mortar = 1.0f;
+ }
+ else {
+ min_dist = 1.0f - min_dist/mortar_size;
+ mortar = (min_dist < mortar_smooth)? smoothstepf(min_dist / mortar_smooth) : 1.0f;
+ }
+
+ return make_float2(tint, mortar);
}
ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
uint4 node2 = read_node(kg, offset);
uint4 node3 = read_node(kg, offset);
+ uint4 node4 = read_node(kg, offset);
/* Input and Output Sockets */
uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
- uint color_offset, fac_offset;
+ uint color_offset, fac_offset, mortar_smooth_offset;
/* RNA properties */
uint offset_frequency, squash_frequency;
decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
- decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, NULL);
+ decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
@@ -82,13 +93,14 @@ ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *sta
float scale = stack_load_float_default(stack, scale_offset, node2.y);
float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
+ float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
float bias = stack_load_float_default(stack, bias_offset, node2.w);
float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
float offset_amount = __int_as_float(node3.z);
float squash_amount = __int_as_float(node3.w);
- float2 f2 = svm_brick(co*scale, mortar_size, bias, brick_width, row_height,
+ float2 f2 = svm_brick(co*scale, mortar_size, mortar_smooth, bias, brick_width, row_height,
offset_amount, offset_frequency, squash_amount, squash_frequency);
float tint = f2.x;
@@ -100,7 +112,7 @@ ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *sta
}
if(stack_valid(color_offset))
- stack_store_float3(stack, color_offset, (f == 1.0f)? mortar: color1);
+ stack_store_float3(stack, color_offset, color1*(1.0f-f) + mortar*f);
if(stack_valid(fac_offset))
stack_store_float(stack, fac_offset, f);
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 378ce65..2afdf61 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -29,147 +29,6 @@ CCL_NAMESPACE_BEGIN
# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_OPENCL
#endif
-#ifdef __KERNEL_OPENCL__
-
-/* For OpenCL all images are packed in a single array, and we do manual lookup
- * and interpolation. */
-
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
-{
- /* Float4 */
- if(id < TEX_START_BYTE4_OPENCL) {
- return kernel_tex_fetch(__tex_image_float4_packed, offset);
- }
- /* Byte4 */
- else if(id < TEX_START_FLOAT_OPENCL) {
- uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
- float f = 1.0f/255.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
- /* Float */
- else if(id < TEX_START_BYTE_OPENCL) {
- float f = kernel_tex_fetch(__tex_image_float_packed, offset);
- return make_float4(f, f, f, 1.0f);
- }
- /* Byte */
- else {
- uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
- float f = r * (1.0f/255.0f);
- return make_float4(f, f, f, 1.0f);
- }
-}
-
-ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
-{
- x %= width;
- if(x < 0)
- x += width;
- return x;
-}
-
-ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
-{
- return clamp(x, 0, width-1);
-}
-
-ccl_device_inline float svm_image_texture_frac(float x, int *ix)
-{
- int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
- *ix = i;
- return x - (float)i;
-}
-
-ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
-{
- uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
- uint width = info.x;
- uint height = info.y;
- uint offset = info.z;
-
- /* Image Options */
- uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
- uint extension;
- if(info.w & (1 << 1))
- extension = EXTENSION_REPEAT;
- else if(info.w & (1 << 2))
- extension = EXTENSION_EXTEND;
- else
- extension = EXTENSION_CLIP;
-
- float4 r;
- int ix, iy, nix, niy;
- if(interpolation == INTERPOLATION_CLOSEST) {
- svm_image_texture_frac(x*width, &ix);
- svm_image_texture_frac(y*height, &iy);
-
- if(extension == EXTENSION_REPEAT) {
- ix = svm_image_texture_wrap_periodic(ix, width);
- iy = svm_image_texture_wrap_periodic(iy, height);
- }
- else if(extension == EXTENSION_CLIP) {
- if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f)
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- else { /* EXTENSION_EXTEND */
- ix = svm_image_texture_wrap_clamp(ix, width);
- iy = svm_image_texture_wrap_clamp(iy, height);
- }
-
- r = svm_image_texture_read(kg, id, offset + ix + iy*width);
- }
- else { /* INTERPOLATION_LINEAR */
- float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
-
- if(extension == EXTENSION_REPEAT) {
- ix = svm_image_texture_wrap_periodic(ix, width);
- iy = svm_image_texture_wrap_periodic(iy, height);
-
- nix = svm_image_texture_wrap_periodic(ix+1, width);
- niy = svm_image_texture_wrap_periodic(iy+1, height);
- }
- else {
- if(extension == EXTENSION_CLIP) {
- if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
- nix = svm_image_texture_wrap_clamp(ix+1, width);
- niy = svm_image_texture_wrap_clamp(iy+1, height);
- ix = svm_image_texture_wrap_clamp(ix, width);
- iy = svm_image_texture_wrap_clamp(iy, height);
- }
-
- r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
- r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
- r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
- r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
- }
-
- if(use_alpha && r.w != 1.0f && r.w != 0.0f) {
- float invw = 1.0f/r.w;
- r.x *= invw;
- r.y *= invw;
- r.z *= invw;
-
- if(id >= TEX_NUM_FLOAT4_IMAGES) {
- r.x = min(r.x, 1.0f);
- r.y = min(r.y, 1.0f);
- r.z = min(r.z, 1.0f);
- }
- }
-
- if(srgb) {
- r.x = color_srgb_to_scene_linear(r.x);
- r.y = color_srgb_to_scene_linear(r.y);
- r.z = color_srgb_to_scene_linear(r.z);
- }
-
- return r;
-}
-
-#else
-
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{
#ifdef __KERNEL_CPU__
@@ -180,6 +39,8 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
# else
float4 r = kernel_tex_image_interp(id, x, y);
# endif
+#elif defined(__KERNEL_OPENCL__)
+ float4 r = kernel_tex_image_interp(kg, id, x, y);
#else
float4 r;
@@ -339,8 +200,6 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
return r;
}
-#endif
-
/* Remap coordnate from 0..1 box to -1..-1 */
ccl_device_inline float3 texco_remap_square(float3 co)
{
@@ -382,8 +241,7 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
float3 N = ccl_fetch(sd, N);
N = ccl_fetch(sd, N);
- if(ccl_fetch(sd, object) != OBJECT_NONE)
- object_inverse_normal_transform(kg, sd, &N);
+ object_inverse_normal_transform(kg, sd, &N);
/* project from direction vector to barycentric coordinates in triangles */
N.x = fabsf(N.x);
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index f35ea05..04f6f62 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -34,6 +34,8 @@ ccl_device void svm_node_light_path(ShaderData *sd, ccl_addr_space PathState *st
case NODE_LP_backfacing: info = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f: 0.0f; break;
case NODE_LP_ray_length: info = ccl_fetch(sd, ray_length); break;
case NODE_LP_ray_depth: info = (float)state->bounce; break;
+ case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break;
+ case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break;
case NODE_LP_ray_transparent: info = (float)state->transparent_bounce; break;
case NODE_LP_ray_transmission: info = (float)state->transmission_bounce; break;
}
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 6d13a0d..01547b6 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -164,6 +164,9 @@ ccl_device float3 svm_math_blackbody_color(float t) {
ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
{
+ if(gamma == 0.0f)
+ return make_float3(1.0f, 1.0f, 1.0f);
+
if(color.x > 0.0f)
color.x = powf(color.x, gamma);
if(color.y > 0.0f)
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 6ea2539..c0b0126 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -49,8 +49,7 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
}
case NODE_TEXCO_NORMAL: {
data = ccl_fetch(sd, N);
- if(ccl_fetch(sd, object) != OBJECT_NONE)
- object_inverse_normal_transform(kg, sd, &data);
+ object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
@@ -131,8 +130,7 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
}
case NODE_TEXCO_NORMAL: {
data = ccl_fetch(sd, N);
- if(ccl_fetch(sd, object) != OBJECT_NONE)
- object_inverse_normal_transform(kg, sd, &data);
+ object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
@@ -216,8 +214,7 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
}
case NODE_TEXCO_NORMAL: {
data = ccl_fetch(sd, N);
- if(ccl_fetch(sd, object) != OBJECT_NONE)
- object_inverse_normal_transform(kg, sd, &data);
+ object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 5adf7d3..47209dd 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -188,6 +188,8 @@ typedef enum NodeLightPath {
NODE_LP_backfacing,
NODE_LP_ray_length,
NODE_LP_ray_depth,
+ NODE_LP_ray_diffuse,
+ NODE_LP_ray_glossy,
NODE_LP_ray_transparent,
NODE_LP_ray_transmission,
} NodeLightPath;
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index f54f4e8..a8b3604 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -43,7 +43,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
co = transform_point(&tfm, co);
}
float4 r;
-# if defined(__KERNEL_GPU__)
+# if defined(__KERNEL_CUDA__)
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
if(id < 2048) /* TODO(dingto): Make this a variable */
@@ -55,9 +55,11 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
# else /* __CUDA_ARCH__ >= 300 */
r = volume_image_texture_3d(id, co.x, co.y, co.z);
# endif
-# else /* __KERNEL_GPU__ */
+# elif defined(__KERNEL_OPENCL__)
+ r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z);
+# else
r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z);
-# endif
+# endif /* __KERNEL_CUDA__ */
#else
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#endif
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 13310a6..d9a2970 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -135,20 +135,16 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
{
size_t num_pixels = bake_data->size();
- progress.reset_sample();
- this->num_parts = 0;
+ int num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
- /* calculate the total parts for the progress bar */
+ /* calculate the total pixel samples for the progress bar */
+ total_pixel_samples = 0;
for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
-
- DeviceTask task(DeviceTask::SHADER);
- task.shader_w = shader_size;
-
- this->num_parts += device->get_split_task_count(task);
+ total_pixel_samples += shader_size * num_samples;
}
-
- this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
+ progress.reset_sample();
+ progress.set_total_pixel_samples(total_pixel_samples);
for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
@@ -187,9 +183,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
task.shader_x = 0;
task.offset = shader_offset;
task.shader_w = d_output.size();
- task.num_samples = this->num_samples;
+ task.num_samples = num_samples;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
- task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress);
+ task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
device->task_add(task);
device->task_wait();
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index 8377e38..25f5eb3 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -73,8 +73,7 @@ public:
bool need_update;
- int num_samples;
- int num_parts;
+ int total_pixel_samples;
private:
BakeData *m_bake_data;
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 1e170d3..f169271 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -135,15 +135,7 @@ void RenderBuffers::reset(Device *device, BufferParams& params_)
/* allocate rng state */
rng_state.resize(params.width, params.height);
- uint *init_state = rng_state.resize(params.width, params.height);
- int x, y, width = params.width, height = params.height;
-
- for(y = 0; y < height; y++)
- for(x = 0; x < width; x++)
- init_state[y*width + x] = hash_int_2d(params.full_x+x, params.full_y+y);
-
device->mem_alloc(rng_state, MEM_READ_WRITE);
- device->mem_copy_to(rng_state);
}
bool RenderBuffers::copy_from_device()
@@ -193,13 +185,11 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
}
}
#ifdef WITH_CYCLES_DEBUG
- else if(type == PASS_BVH_TRAVERSAL_STEPS) {
- for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
- float f = *in;
- pixels[0] = f*scale;
- }
- }
- else if(type == PASS_RAY_BOUNCES) {
+ else if(type == PASS_BVH_TRAVERSED_NODES ||
+ type == PASS_BVH_TRAVERSED_INSTANCES ||
+ type == PASS_BVH_INTERSECTIONS ||
+ type == PASS_RAY_BOUNCES)
+ {
for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
float f = *in;
pixels[0] = f*scale;
diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp
index 200a4c4..b7f2566 100644
--- a/intern/cycles/render/constant_fold.cpp
+++ b/intern/cycles/render/constant_fold.cpp
@@ -89,6 +89,19 @@ void ConstantFolder::make_zero() const
}
}
+void ConstantFolder::make_one() const
+{
+ if(output->type() == SocketType::FLOAT) {
+ make_constant(1.0f);
+ }
+ else if(SocketType::is_float3(output->type())) {
+ make_constant(make_float3(1.0f, 1.0f, 1.0f));
+ }
+ else {
+ assert(0);
+ }
+}
+
void ConstantFolder::bypass(ShaderOutput *new_output) const
{
assert(new_output);
@@ -321,6 +334,15 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
make_zero();
}
break;
+ case NODE_MATH_POWER:
+ /* 1 ^ X == X ^ 0 == 1 */
+ if(is_one(value1_in) || is_zero(value2_in)) {
+ make_one();
+ }
+ /* X ^ 1 == X */
+ else if(is_one(value2_in)) {
+ try_bypass_or_make_constant(value1_in, clamp);
+ }
default:
break;
}
diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h
index 2b31c2a..7962698 100644
--- a/intern/cycles/render/constant_fold.h
+++ b/intern/cycles/render/constant_fold.h
@@ -43,6 +43,7 @@ public:
void make_constant_clamp(float value, bool clamp) const;
void make_constant_clamp(float3 value, bool clamp) const;
void make_zero() const;
+ void make_one() const;
/* Bypass node, relinking to another output socket. */
void bypass(ShaderOutput *output) const;
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index e10a938..923252b 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -154,14 +154,9 @@ void Pass::add(PassType type, array<Pass>& passes)
pass.components = 0;
break;
#ifdef WITH_CYCLES_DEBUG
- case PASS_BVH_TRAVERSAL_STEPS:
- pass.components = 1;
- pass.exposure = false;
- break;
+ case PASS_BVH_TRAVERSED_NODES:
case PASS_BVH_TRAVERSED_INSTANCES:
- pass.components = 1;
- pass.exposure = false;
- break;
+ case PASS_BVH_INTERSECTIONS:
case PASS_RAY_BOUNCES:
pass.components = 1;
pass.exposure = false;
@@ -421,12 +416,15 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
break;
#ifdef WITH_CYCLES_DEBUG
- case PASS_BVH_TRAVERSAL_STEPS:
- kfilm->pass_bvh_traversal_steps = kfilm->pass_stride;
+ case PASS_BVH_TRAVERSED_NODES:
+ kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;
break;
case PASS_BVH_TRAVERSED_INSTANCES:
kfilm->pass_bvh_traversed_instances = kfilm->pass_stride;
break;
+ case PASS_BVH_INTERSECTIONS:
+ kfilm->pass_bvh_intersections = kfilm->pass_stride;
+ break;
case PASS_RAY_BOUNCES:
kfilm->pass_ray_bounces = kfilm->pass_stride;
break;
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 83d69e9..ab830b1 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -19,6 +19,7 @@
#include "scene.h"
#include "util_foreach.h"
+#include "util_logging.h"
#include "util_path.h"
#include "util_progress.h"
#include "util_texture.h"
@@ -280,6 +281,8 @@ int ImageManager::add_image(const string& filename,
ImageDataType type = get_image_metadata(filename, builtin_data, is_linear);
+ thread_scoped_lock device_lock(device_mutex);
+
/* Do we have a float? */
if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
is_float = true;
@@ -469,133 +472,51 @@ bool ImageManager::file_load_image_generic(Image *img, ImageInput **in, int &wid
return true;
}
-template<typename T>
-bool ImageManager::file_load_byte_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
+template<TypeDesc::BASETYPE FileFormat,
+ typename StorageType,
+ typename DeviceType>
+bool ImageManager::file_load_image(Image *img,
+ ImageDataType type,
+ int texture_limit,
+ device_vector<DeviceType>& tex_img)
{
+ const StorageType alpha_one = (FileFormat == TypeDesc::UINT8)? 255 : 1;
ImageInput *in = NULL;
int width, height, depth, components;
-
- if(!file_load_image_generic(img, &in, width, height, depth, components))
- return false;
-
- /* read RGBA pixels */
- uchar *pixels = (uchar*)tex_img.resize(width, height, depth);
- if(pixels == NULL) {
+ if(!file_load_image_generic(img, &in, width, height, depth, components)) {
return false;
}
- bool cmyk = false;
-
- if(in) {
- if(depth <= 1) {
- int scanlinesize = width*components*sizeof(uchar);
-
- in->read_image(TypeDesc::UINT8,
- (uchar*)pixels + (((size_t)height)-1)*scanlinesize,
- AutoStride,
- -scanlinesize,
- AutoStride);
- }
- else {
- in->read_image(TypeDesc::UINT8, (uchar*)pixels);
- }
-
- cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
-
- in->close();
- delete in;
+ /* Read RGBA pixels. */
+ vector<StorageType> pixels_storage;
+ StorageType *pixels;
+ const size_t max_size = max(max(width, height), depth);
+ if(texture_limit > 0 && max_size > texture_limit) {
+ pixels_storage.resize(((size_t)width)*height*depth*4);
+ pixels = &pixels_storage[0];
}
else {
- builtin_image_pixels_cb(img->filename, img->builtin_data, pixels);
- }
-
- /* Check if we actually have a byte4 slot, in case components == 1, but device
- * doesn't support single channel textures. */
- if(type == IMAGE_DATA_TYPE_BYTE4) {
- size_t num_pixels = ((size_t)width) * height * depth;
- if(cmyk) {
- /* CMYK */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
- pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
- pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
- pixels[i*4+3] = 255;
- }
- }
- else if(components == 2) {
- /* grayscale + alpha */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = pixels[i*2+1];
- pixels[i*4+2] = pixels[i*2+0];
- pixels[i*4+1] = pixels[i*2+0];
- pixels[i*4+0] = pixels[i*2+0];
- }
- }
- else if(components == 3) {
- /* RGB */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 255;
- pixels[i*4+2] = pixels[i*3+2];
- pixels[i*4+1] = pixels[i*3+1];
- pixels[i*4+0] = pixels[i*3+0];
- }
- }
- else if(components == 1) {
- /* grayscale */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 255;
- pixels[i*4+2] = pixels[i];
- pixels[i*4+1] = pixels[i];
- pixels[i*4+0] = pixels[i];
- }
- }
-
- if(img->use_alpha == false) {
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 255;
- }
- }
- }
-
- return true;
-}
-
-template<typename T>
-bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
-{
- ImageInput *in = NULL;
- int width, height, depth, components;
-
- if(!file_load_image_generic(img, &in, width, height, depth, components))
- return false;
-
- /* read RGBA pixels */
- float *pixels = (float*)tex_img.resize(width, height, depth);
- if(pixels == NULL) {
- return false;
+ pixels = (StorageType*)tex_img.resize(width, height, depth);
}
bool cmyk = false;
-
+ const size_t num_pixels = ((size_t)width) * height * depth;
if(in) {
- float *readpixels = pixels;
- vector<float> tmppixels;
-
+ StorageType *readpixels = pixels;
+ vector<StorageType> tmppixels;
if(components > 4) {
tmppixels.resize(((size_t)width)*height*components);
readpixels = &tmppixels[0];
}
-
if(depth <= 1) {
- size_t scanlinesize = ((size_t)width)*components*sizeof(float);
- in->read_image(TypeDesc::FLOAT,
+ size_t scanlinesize = ((size_t)width)*components*sizeof(StorageType);
+ in->read_image(FileFormat,
(uchar*)readpixels + (height-1)*scanlinesize,
AutoStride,
-scanlinesize,
AutoStride);
}
else {
- in->read_image(TypeDesc::FLOAT, (uchar*)readpixels);
+ in->read_image(FileFormat, (uchar*)readpixels);
}
-
if(components > 4) {
size_t dimensions = ((size_t)width)*height;
for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
@@ -604,30 +525,43 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_
pixels[i*4+1] = tmppixels[i*components+1];
pixels[i*4+0] = tmppixels[i*components+0];
}
-
tmppixels.clear();
}
-
cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
-
in->close();
delete in;
}
else {
- builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels);
+ if(FileFormat == TypeDesc::FLOAT) {
+ builtin_image_float_pixels_cb(img->filename,
+ img->builtin_data,
+ (float*)&pixels[0],
+ num_pixels * components);
+ }
+ else if(FileFormat == TypeDesc::UINT8) {
+ builtin_image_pixels_cb(img->filename,
+ img->builtin_data,
+ (uchar*)&pixels[0],
+ num_pixels * components);
+ }
+ else {
+ /* TODO(dingto): Support half for ImBuf. */
+ }
}
-
- /* Check if we actually have a float4 slot, in case components == 1, but device
- * doesn't support single channel textures. */
- if(type == IMAGE_DATA_TYPE_FLOAT4) {
- size_t num_pixels = ((size_t)width) * height * depth;
+ /* Check if we actually have a float4 slot, in case components == 1,
+ * but device doesn't support single channel textures.
+ */
+ bool is_rgba = (type == IMAGE_DATA_TYPE_FLOAT4 ||
+ type == IMAGE_DATA_TYPE_HALF4 ||
+ type == IMAGE_DATA_TYPE_BYTE4);
+ if(is_rgba) {
if(cmyk) {
/* CMYK */
for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 255;
pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
+ pixels[i*4+3] = alpha_one;
}
}
else if(components == 2) {
@@ -642,7 +576,7 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_
else if(components == 3) {
/* RGB */
for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 1.0f;
+ pixels[i*4+3] = alpha_one;
pixels[i*4+2] = pixels[i*3+2];
pixels[i*4+1] = pixels[i*3+1];
pixels[i*4+0] = pixels[i*3+0];
@@ -651,128 +585,53 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_
else if(components == 1) {
/* grayscale */
for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 1.0f;
+ pixels[i*4+3] = alpha_one;
pixels[i*4+2] = pixels[i];
pixels[i*4+1] = pixels[i];
pixels[i*4+0] = pixels[i];
}
}
-
if(img->use_alpha == false) {
for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 1.0f;
+ pixels[i*4+3] = alpha_one;
}
}
}
-
- return true;
-}
-
-template<typename T>
-bool ImageManager::file_load_half_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
-{
- ImageInput *in = NULL;
- int width, height, depth, components;
-
- if(!file_load_image_generic(img, &in, width, height, depth, components))
- return false;
-
- /* read RGBA pixels */
- half *pixels = (half*)tex_img.resize(width, height, depth);
- if(pixels == NULL) {
- return false;
- }
-
- if(in) {
- half *readpixels = pixels;
- vector<half> tmppixels;
-
- if(components > 4) {
- tmppixels.resize(((size_t)width)*height*components);
- readpixels = &tmppixels[0];
- }
-
- if(depth <= 1) {
- size_t scanlinesize = ((size_t)width)*components*sizeof(half);
- in->read_image(TypeDesc::HALF,
- (uchar*)readpixels + (height-1)*scanlinesize,
- AutoStride,
- -scanlinesize,
- AutoStride);
- }
- else {
- in->read_image(TypeDesc::HALF, (uchar*)readpixels);
- }
-
- if(components > 4) {
- size_t dimensions = ((size_t)width)*height;
- for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
- pixels[i*4+3] = tmppixels[i*components+3];
- pixels[i*4+2] = tmppixels[i*components+2];
- pixels[i*4+1] = tmppixels[i*components+1];
- pixels[i*4+0] = tmppixels[i*components+0];
- }
-
- tmppixels.clear();
- }
-
- in->close();
- delete in;
+ if(pixels_storage.size() > 0) {
+ float scale_factor = 1.0f;
+ while(max_size * scale_factor > texture_limit) {
+ scale_factor *= 0.5f;
+ }
+ VLOG(1) << "Scaling image " << img->filename
+ << " by a factor of " << scale_factor << ".";
+ vector<StorageType> scaled_pixels;
+ size_t scaled_width, scaled_height, scaled_depth;
+ util_image_resize_pixels(pixels_storage,
+ width, height, depth,
+ is_rgba ? 4 : 1,
+ scale_factor,
+ &scaled_pixels,
+ &scaled_width, &scaled_height, &scaled_depth);
+ StorageType *texture_pixels = (StorageType*)tex_img.resize(scaled_width,
+ scaled_height,
+ scaled_depth);
+ memcpy(texture_pixels,
+ &scaled_pixels[0],
+ scaled_pixels.size() * sizeof(StorageType));
}
-#if 0
- /* TODO(dingto): Support half for ImBuf. */
- else {
- builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels);
- }
-#endif
-
- /* Check if we actually have a half4 slot, in case components == 1, but device
- * doesn't support single channel textures. */
- if(type == IMAGE_DATA_TYPE_HALF4) {
- size_t num_pixels = ((size_t)width) * height * depth;
- if(components == 2) {
- /* grayscale + alpha */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = pixels[i*2+1];
- pixels[i*4+2] = pixels[i*2+0];
- pixels[i*4+1] = pixels[i*2+0];
- pixels[i*4+0] = pixels[i*2+0];
- }
- }
- else if(components == 3) {
- /* RGB */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 1.0f;
- pixels[i*4+2] = pixels[i*3+2];
- pixels[i*4+1] = pixels[i*3+1];
- pixels[i*4+0] = pixels[i*3+0];
- }
- }
- else if(components == 1) {
- /* grayscale */
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 1.0f;
- pixels[i*4+2] = pixels[i];
- pixels[i*4+1] = pixels[i];
- pixels[i*4+0] = pixels[i];
- }
- }
-
- if(img->use_alpha == false) {
- for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
- pixels[i*4+3] = 1.0f;
- }
- }
- }
-
return true;
}
-void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot, Progress *progress)
+void ImageManager::device_load_image(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ ImageDataType type,
+ int slot,
+ Progress *progress)
{
if(progress->get_cancel())
return;
-
+
Image *img = images[type][slot];
if(osl_texture_system && !img->builtin_data)
@@ -781,6 +640,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
string filename = path_filename(images[type][slot]->filename);
progress->set_status("Updating Images", "Loading " + filename);
+ const int texture_limit = scene->params.texture_limit;
+
/* Slot assignment */
int flat_slot = type_index_to_flattened_slot(slot, type);
@@ -800,7 +661,11 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_free(tex_img);
}
- if(!file_load_float_image(img, type, tex_img)) {
+ if(!file_load_image<TypeDesc::FLOAT, float>(img,
+ type,
+ texture_limit,
+ tex_img))
+ {
/* on failure to load, we set a 1x1 pixels pink image */
float *pixels = (float*)tex_img.resize(1, 1);
@@ -826,7 +691,11 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_free(tex_img);
}
- if(!file_load_float_image(img, type, tex_img)) {
+ if(!file_load_image<TypeDesc::FLOAT, float>(img,
+ type,
+ texture_limit,
+ tex_img))
+ {
/* on failure to load, we set a 1x1 pixels pink image */
float *pixels = (float*)tex_img.resize(1, 1);
@@ -849,7 +718,11 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_free(tex_img);
}
- if(!file_load_byte_image(img, type, tex_img)) {
+ if(!file_load_image<TypeDesc::UINT8, uchar>(img,
+ type,
+ texture_limit,
+ tex_img))
+ {
/* on failure to load, we set a 1x1 pixels pink image */
uchar *pixels = (uchar*)tex_img.resize(1, 1);
@@ -875,7 +748,10 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_free(tex_img);
}
- if(!file_load_byte_image(img, type, tex_img)) {
+ if(!file_load_image<TypeDesc::UINT8, uchar>(img,
+ type,
+ texture_limit,
+ tex_img)) {
/* on failure to load, we set a 1x1 pixels pink image */
uchar *pixels = (uchar*)tex_img.resize(1, 1);
@@ -898,7 +774,10 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_free(tex_img);
}
- if(!file_load_half_image(img, type, tex_img)) {
+ if(!file_load_image<TypeDesc::HALF, half>(img,
+ type,
+ texture_limit,
+ tex_img)) {
/* on failure to load, we set a 1x1 pixels pink image */
half *pixels = (half*)tex_img.resize(1, 1);
@@ -924,7 +803,10 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_free(tex_img);
}
- if(!file_load_half_image(img, type, tex_img)) {
+ if(!file_load_image<TypeDesc::HALF, half>(img,
+ type,
+ texture_limit,
+ tex_img)) {
/* on failure to load, we set a 1x1 pixels pink image */
half *pixels = (half*)tex_img.resize(1, 1);
@@ -1020,7 +902,10 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD
}
}
-void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& progress)
+void ImageManager::device_update(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress& progress)
{
if(!need_update)
return;
@@ -1037,7 +922,14 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
}
else if(images[type][slot]->need_load) {
if(!osl_texture_system || images[type][slot]->builtin_data)
- pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, (ImageDataType)type, slot, &progress));
+ pool.push(function_bind(&ImageManager::device_load_image,
+ this,
+ device,
+ dscene,
+ scene,
+ (ImageDataType)type,
+ slot,
+ &progress));
}
}
}
@@ -1052,6 +944,7 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
void ImageManager::device_update_slot(Device *device,
DeviceScene *dscene,
+ Scene *scene,
int flat_slot,
Progress *progress)
{
@@ -1068,6 +961,7 @@ void ImageManager::device_update_slot(Device *device,
if(!osl_texture_system || image->builtin_data)
device_load_image(device,
dscene,
+ scene,
type,
slot,
progress);
@@ -1105,7 +999,7 @@ void ImageManager::device_pack_images(Device *device,
int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
+ tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
- uint4 *info = dscene->tex_image_packed_info.resize(info_size);
+ uint4 *info = dscene->tex_image_packed_info.resize(info_size*2);
/* Byte4 Textures*/
type = IMAGE_DATA_TYPE_BYTE4;
@@ -1128,7 +1022,9 @@ void ImageManager::device_pack_images(Device *device,
uint8_t options = pack_image_options(type, slot);
- info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ int index = type_index_to_flattened_slot(slot, type) * 2;
+ info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
@@ -1157,7 +1053,10 @@ void ImageManager::device_pack_images(Device *device,
/* todo: support 3D textures, only CPU for now */
uint8_t options = pack_image_options(type, slot);
- info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
+ int index = type_index_to_flattened_slot(slot, type) * 2;
+ info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
@@ -1185,7 +1084,9 @@ void ImageManager::device_pack_images(Device *device,
uint8_t options = pack_image_options(type, slot);
- info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ int index = type_index_to_flattened_slot(slot, type) * 2;
+ info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
@@ -1214,7 +1115,10 @@ void ImageManager::device_pack_images(Device *device,
/* todo: support 3D textures, only CPU for now */
uint8_t options = pack_image_options(type, slot);
- info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
+ int index = type_index_to_flattened_slot(slot, type) * 2;
+ info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index cca71a6..47bbd92 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -30,6 +30,7 @@ CCL_NAMESPACE_BEGIN
class Device;
class DeviceScene;
class Progress;
+class Scene;
class ImageManager {
public:
@@ -67,8 +68,15 @@ public:
ExtensionType extension);
ImageDataType get_image_metadata(const string& filename, void *builtin_data, bool& is_linear);
- void device_update(Device *device, DeviceScene *dscene, Progress& progress);
- void device_update_slot(Device *device, DeviceScene *dscene, int flat_slot, Progress *progress);
+ void device_update(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress& progress);
+ void device_update_slot(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ int flat_slot,
+ Progress *progress);
void device_free(Device *device, DeviceScene *dscene);
void device_free_builtin(Device *device, DeviceScene *dscene);
@@ -78,9 +86,25 @@ public:
bool need_update;
- function<void(const string &filename, void *data, bool &is_float, int &width, int &height, int &depth, int &channels)> builtin_image_info_cb;
- function<bool(const string &filename, void *data, unsigned char *pixels)> builtin_image_pixels_cb;
- function<bool(const string &filename, void *data, float *pixels)> builtin_image_float_pixels_cb;
+ /* NOTE: Here pixels_size is a size of storage, which equals to
+ * width * height * depth.
+ * Use this to avoid some nasty memory corruptions.
+ */
+ function<void(const string &filename,
+ void *data,
+ bool &is_float,
+ int &width,
+ int &height,
+ int &depth,
+ int &channels)> builtin_image_info_cb;
+ function<bool(const string &filename,
+ void *data,
+ unsigned char *pixels,
+ const size_t pixels_size)> builtin_image_pixels_cb;
+ function<bool(const string &filename,
+ void *data,
+ float *pixels,
+ const size_t pixels_size)> builtin_image_float_pixels_cb;
struct Image {
string filename;
@@ -109,14 +133,13 @@ private:
bool file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components);
- template<typename T>
- bool file_load_byte_image(Image *img, ImageDataType type, device_vector<T>& tex_img);
-
- template<typename T>
- bool file_load_float_image(Image *img, ImageDataType type, device_vector<T>& tex_img);
-
- template<typename T>
- bool file_load_half_image(Image *img, ImageDataType type, device_vector<T>& tex_img);
+ template<TypeDesc::BASETYPE FileFormat,
+ typename StorageType,
+ typename DeviceType>
+ bool file_load_image(Image *img,
+ ImageDataType type,
+ int texture_limit,
+ device_vector<DeviceType>& tex_img);
int type_index_to_flattened_slot(int slot, ImageDataType type);
int flattened_slot_to_type_index(int flat_slot, ImageDataType *type);
@@ -124,10 +147,20 @@ private:
uint8_t pack_image_options(ImageDataType type, size_t slot);
- void device_load_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot, Progress *progess);
- void device_free_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot);
-
- void device_pack_images(Device *device, DeviceScene *dscene, Progress& progess);
+ void device_load_image(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ ImageDataType type,
+ int slot,
+ Progress *progess);
+ void device_free_image(Device *device,
+ DeviceScene *dscene,
+ ImageDataType type,
+ int slot);
+
+ void device_pack_images(Device *device,
+ DeviceScene *dscene,
+ Progress& progess);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 63914e5..d434b33 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -62,9 +62,11 @@ NODE_DEFINE(Integrator)
SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1);
SOCKET_INT(subsurface_samples, "Subsurface Samples", 1);
SOCKET_INT(volume_samples, "Volume Samples", 1);
+ SOCKET_INT(start_sample, "Start Sample", 0);
SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
+ SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
static NodeEnum method_enum;
method_enum.insert("path", PATH);
@@ -151,6 +153,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->mesh_light_samples = mesh_light_samples;
kintegrator->subsurface_samples = subsurface_samples;
kintegrator->volume_samples = volume_samples;
+ kintegrator->start_sample = start_sample;
if(method == BRANCHED_PATH) {
kintegrator->sample_all_lights_direct = sample_all_lights_direct;
@@ -164,6 +167,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->aa_samples = aa_samples;
+ if(light_sampling_threshold > 0.0f) {
+ kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
+ }
+ else {
+ kintegrator->light_inv_rr_threshold = 0.0f;
+ }
+
/* sobol directions table */
int max_samples = 1;
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index 39eaaf2..e1e316d 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -64,8 +64,11 @@ public:
int mesh_light_samples;
int subsurface_samples;
int volume_samples;
+ int start_sample;
+
bool sample_all_lights_direct;
bool sample_all_lights_indirect;
+ float light_sampling_threshold;
enum Method {
BRANCHED_PATH = 0,
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index b6c45dd..2245c86 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -43,8 +43,8 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
for(int y = 0; y < height; y++) {
for(int x = 0; x < width; x++) {
- float u = x/(float)width;
- float v = y/(float)height;
+ float u = (x + 0.5f)/width;
+ float v = (y + 0.5f)/height;
uint4 in = make_uint4(__float_as_int(u), __float_as_int(v), 0, 0);
d_input_data[x + y*width] = in;
@@ -106,6 +106,7 @@ NODE_DEFINE(Light)
static NodeEnum type_enum;
type_enum.insert("point", LIGHT_POINT);
+ type_enum.insert("distant", LIGHT_DISTANT);
type_enum.insert("background", LIGHT_BACKGROUND);
type_enum.insert("area", LIGHT_AREA);
type_enum.insert("spot", LIGHT_SPOT);
@@ -126,6 +127,8 @@ NODE_DEFINE(Light)
SOCKET_FLOAT(spot_angle, "Spot Angle", M_PI_4_F);
SOCKET_FLOAT(spot_smooth, "Spot Smooth", 0.0f);
+ SOCKET_TRANSFORM(tfm, "Transform", transform_identity());
+
SOCKET_BOOLEAN(cast_shadow, "Cast Shadow", true);
SOCKET_BOOLEAN(use_mis, "Use Mis", false);
SOCKET_BOOLEAN(use_diffuse, "Use Diffuse", true);
@@ -674,7 +677,6 @@ void LightManager::device_update_points(Device *device,
light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, invarea, 0.0f);
light_data[light_index*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
- light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
}
else if(light->type == LIGHT_DISTANT) {
shader_id &= ~SHADER_AREA_LIGHT;
@@ -695,7 +697,6 @@ void LightManager::device_update_points(Device *device,
light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, cosangle, invarea);
light_data[light_index*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
- light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
}
else if(light->type == LIGHT_BACKGROUND) {
uint visibility = scene->background->visibility;
@@ -724,7 +725,6 @@ void LightManager::device_update_points(Device *device,
light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), 0.0f, 0.0f, 0.0f);
light_data[light_index*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
- light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
}
else if(light->type == LIGHT_AREA) {
float3 axisu = light->axisu*(light->sizeu*light->size);
@@ -742,7 +742,6 @@ void LightManager::device_update_points(Device *device,
light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), axisu.x, axisu.y, axisu.z);
light_data[light_index*LIGHT_SIZE + 2] = make_float4(invarea, axisv.x, axisv.y, axisv.z);
light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, dir.x, dir.y, dir.z);
- light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
}
else if(light->type == LIGHT_SPOT) {
shader_id &= ~SHADER_AREA_LIGHT;
@@ -762,9 +761,15 @@ void LightManager::device_update_points(Device *device,
light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, invarea, spot_angle);
light_data[light_index*LIGHT_SIZE + 2] = make_float4(spot_smooth, dir.x, dir.y, dir.z);
light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
- light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
}
+ light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
+
+ Transform tfm = light->tfm;
+ Transform itfm = transform_inverse(tfm);
+ memcpy(&light_data[light_index*LIGHT_SIZE + 5], &tfm, sizeof(float4)*3);
+ memcpy(&light_data[light_index*LIGHT_SIZE + 8], &itfm, sizeof(float4)*3);
+
light_index++;
}
@@ -791,6 +796,11 @@ void LightManager::device_update_points(Device *device,
light_data[light_index*LIGHT_SIZE + 3] = make_float4(-1, dir.x, dir.y, dir.z);
light_data[light_index*LIGHT_SIZE + 4] = make_float4(-1, 0.0f, 0.0f, 0.0f);
+ Transform tfm = light->tfm;
+ Transform itfm = transform_inverse(tfm);
+ memcpy(&light_data[light_index*LIGHT_SIZE + 5], &tfm, sizeof(float4)*3);
+ memcpy(&light_data[light_index*LIGHT_SIZE + 8], &itfm, sizeof(float4)*3);
+
light_index++;
}
diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h
index 040a672..f56530b 100644
--- a/intern/cycles/render/light.h
+++ b/intern/cycles/render/light.h
@@ -50,6 +50,8 @@ public:
float3 axisv;
float sizev;
+ Transform tfm;
+
int map_resolution;
float spot_angle;
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index ac369a0..c42b329 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -49,6 +49,64 @@ void Mesh::Triangle::bounds_grow(const float3 *verts, BoundBox& bounds) const
bounds.grow(verts[v[2]]);
}
+void Mesh::Triangle::motion_verts(const float3 *verts,
+ const float3 *vert_steps,
+ size_t num_verts,
+ size_t num_steps,
+ float time,
+ float3 r_verts[3]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time*max_step - step;
+ /* Fetch vertex coordinates. */
+ float3 curr_verts[3];
+ float3 next_verts[3];
+ verts_for_step(verts,
+ vert_steps,
+ num_verts,
+ num_steps,
+ step,
+ curr_verts);
+ verts_for_step(verts,
+ vert_steps,
+ num_verts,
+ num_steps,
+ step + 1,
+ next_verts);
+ /* Interpolate between steps. */
+ r_verts[0] = (1.0f - t)*curr_verts[0] + t*next_verts[0];
+ r_verts[1] = (1.0f - t)*curr_verts[1] + t*next_verts[1];
+ r_verts[2] = (1.0f - t)*curr_verts[2] + t*next_verts[2];
+}
+
+void Mesh::Triangle::verts_for_step(const float3 *verts,
+ const float3 *vert_steps,
+ size_t num_verts,
+ size_t num_steps,
+ size_t step,
+ float3 r_verts[3]) const
+{
+ const size_t center_step = ((num_steps - 1) / 2);
+ if(step == center_step) {
+ /* Center step: regular vertex location. */
+ r_verts[0] = verts[v[0]];
+ r_verts[1] = verts[v[1]];
+ r_verts[2] = verts[v[2]];
+ }
+ else {
+ /* Center step not stored in the attribute array array. */
+ if(step > center_step) {
+ step--;
+ }
+ size_t offset = step * num_verts;
+ r_verts[0] = vert_steps[offset + v[0]];
+ r_verts[1] = vert_steps[offset + v[1]];
+ r_verts[2] = vert_steps[offset + v[2]];
+ }
+}
+
/* Curve */
void Mesh::Curve::bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox& bounds) const
@@ -104,6 +162,205 @@ void Mesh::Curve::bounds_grow(const int k,
bounds.grow(upper, mr);
}
+void Mesh::Curve::bounds_grow(float4 keys[4], BoundBox& bounds) const
+{
+ float3 P[4] = {
+ float4_to_float3(keys[0]),
+ float4_to_float3(keys[1]),
+ float4_to_float3(keys[2]),
+ float4_to_float3(keys[3]),
+ };
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(keys[1].w, keys[2].w);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
+void Mesh::Curve::motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0, size_t k1,
+ float4 r_keys[2]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time*max_step - step;
+ /* Fetch vertex coordinates. */
+ float4 curr_keys[2];
+ float4 next_keys[2];
+ keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step,
+ k0, k1,
+ curr_keys);
+ keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step + 1,
+ k0, k1,
+ next_keys);
+ /* Interpolate between steps. */
+ r_keys[0] = (1.0f - t)*curr_keys[0] + t*next_keys[0];
+ r_keys[1] = (1.0f - t)*curr_keys[1] + t*next_keys[1];
+}
+
+void Mesh::Curve::cardinal_motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0, size_t k1,
+ size_t k2, size_t k3,
+ float4 r_keys[4]) const
+{
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ const size_t max_step = num_steps - 1;
+ const size_t step = min((int)(time * max_step), max_step - 1);
+ const float t = time*max_step - step;
+ /* Fetch vertex coordinates. */
+ float4 curr_keys[4];
+ float4 next_keys[4];
+ cardinal_keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step,
+ k0, k1, k2, k3,
+ curr_keys);
+ cardinal_keys_for_step(curve_keys,
+ curve_radius,
+ key_steps,
+ num_curve_keys,
+ num_steps,
+ step + 1,
+ k0, k1, k2, k3,
+ next_keys);
+ /* Interpolate between steps. */
+ r_keys[0] = (1.0f - t)*curr_keys[0] + t*next_keys[0];
+ r_keys[1] = (1.0f - t)*curr_keys[1] + t*next_keys[1];
+ r_keys[2] = (1.0f - t)*curr_keys[2] + t*next_keys[2];
+ r_keys[3] = (1.0f - t)*curr_keys[3] + t*next_keys[3];
+}
+
+void Mesh::Curve::keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0, size_t k1,
+ float4 r_keys[2]) const
+{
+ k0 = max(k0, 0);
+ k1 = min(k1, num_keys - 1);
+ const size_t center_step = ((num_steps - 1) / 2);
+ if(step == center_step) {
+ /* Center step: regular key location. */
+ /* TODO(sergey): Consider adding make_float4(float3, float)
+ * function.
+ */
+ r_keys[0] = make_float4(curve_keys[first_key + k0].x,
+ curve_keys[first_key + k0].y,
+ curve_keys[first_key + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(curve_keys[first_key + k1].x,
+ curve_keys[first_key + k1].y,
+ curve_keys[first_key + k1].z,
+ curve_radius[first_key + k1]);
+ }
+ else {
+ /* Center step is not stored in this array. */
+ if(step > center_step) {
+ step--;
+ }
+ const size_t offset = first_key + step * num_curve_keys;
+ r_keys[0] = make_float4(key_steps[offset + k0].x,
+ key_steps[offset + k0].y,
+ key_steps[offset + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(key_steps[offset + k1].x,
+ key_steps[offset + k1].y,
+ key_steps[offset + k1].z,
+ curve_radius[first_key + k1]);
+ }
+}
+
+void Mesh::Curve::cardinal_keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0, size_t k1,
+ size_t k2, size_t k3,
+ float4 r_keys[4]) const
+{
+ k0 = max(k0, 0);
+ k3 = min(k3, num_keys - 1);
+ const size_t center_step = ((num_steps - 1) / 2);
+ if(step == center_step) {
+ /* Center step: regular key location. */
+ r_keys[0] = make_float4(curve_keys[first_key + k0].x,
+ curve_keys[first_key + k0].y,
+ curve_keys[first_key + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(curve_keys[first_key + k1].x,
+ curve_keys[first_key + k1].y,
+ curve_keys[first_key + k1].z,
+ curve_radius[first_key + k1]);
+ r_keys[2] = make_float4(curve_keys[first_key + k2].x,
+ curve_keys[first_key + k2].y,
+ curve_keys[first_key + k2].z,
+ curve_radius[first_key + k2]);
+ r_keys[3] = make_float4(curve_keys[first_key + k3].x,
+ curve_keys[first_key + k3].y,
+ curve_keys[first_key + k3].z,
+ curve_radius[first_key + k3]);
+ }
+ else {
+ /* Center step is not stored in this array. */
+ if(step > center_step) {
+ step--;
+ }
+ const size_t offset = first_key + step * num_curve_keys;
+ r_keys[0] = make_float4(key_steps[offset + k0].x,
+ key_steps[offset + k0].y,
+ key_steps[offset + k0].z,
+ curve_radius[first_key + k0]);
+ r_keys[1] = make_float4(key_steps[offset + k1].x,
+ key_steps[offset + k1].y,
+ key_steps[offset + k1].z,
+ curve_radius[first_key + k1]);
+ r_keys[2] = make_float4(key_steps[offset + k2].x,
+ key_steps[offset + k2].y,
+ key_steps[offset + k2].z,
+ curve_radius[first_key + k2]);
+ r_keys[3] = make_float4(key_steps[offset + k3].x,
+ key_steps[offset + k3].y,
+ key_steps[offset + k3].z,
+ curve_radius[first_key + k3]);
+ }
+}
+
/* SubdFace */
float3 Mesh::SubdFace::normal(const Mesh *mesh) const
@@ -394,7 +651,7 @@ void Mesh::compute_bounds()
if(use_motion_blur && attr) {
size_t steps_size = verts.size() * (motion_steps - 1);
float3 *vert_steps = attr->data_float3();
-
+
for(size_t i = 0; i < steps_size; i++)
bnds.grow(vert_steps[i]);
}
@@ -403,7 +660,7 @@ void Mesh::compute_bounds()
if(use_motion_blur && curve_attr) {
size_t steps_size = curve_keys.size() * (motion_steps - 1);
float3 *key_steps = curve_attr->data_float3();
-
+
for(size_t i = 0; i < steps_size; i++)
bnds.grow(key_steps[i]);
}
@@ -417,11 +674,11 @@ void Mesh::compute_bounds()
for(size_t i = 0; i < curve_keys_size; i++)
bnds.grow_safe(curve_keys[i], curve_radius[i]);
-
+
if(use_motion_blur && attr) {
size_t steps_size = verts.size() * (motion_steps - 1);
float3 *vert_steps = attr->data_float3();
-
+
for(size_t i = 0; i < steps_size; i++)
bnds.grow_safe(vert_steps[i]);
}
@@ -429,7 +686,7 @@ void Mesh::compute_bounds()
if(use_motion_blur && curve_attr) {
size_t steps_size = curve_keys.size() * (motion_steps - 1);
float3 *key_steps = curve_attr->data_float3();
-
+
for(size_t i = 0; i < steps_size; i++)
bnds.grow_safe(key_steps[i]);
}
@@ -464,7 +721,7 @@ void Mesh::add_face_normals()
/* don't compute if already there */
if(attributes.find(ATTR_STD_FACE_NORMAL))
return;
-
+
/* get attributes */
Attribute *attr_fN = attributes.add(ATTR_STD_FACE_NORMAL);
float3 *fN = attr_fN->data_float3();
@@ -796,6 +1053,8 @@ void Mesh::compute_bvh(DeviceScene *dscene,
bparams.use_qbvh = params->use_qbvh;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
params->use_bvh_unaligned_nodes;
+ bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
+ bparams.num_motion_curve_steps = params->num_bvh_time_steps;
delete bvh;
bvh = BVH::create(bparams, objects);
@@ -1002,7 +1261,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
if(attr_map_stride == 0)
return;
-
+
/* create attribute map */
uint4 *attr_map = dscene->attributes_map.resize(attr_map_stride*scene->objects.size());
memset(attr_map, 0, dscene->attributes_map.size()*sizeof(uint));
@@ -1084,7 +1343,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
}
/* terminator */
- for(int i = 0; i < ATTR_PRIM_TYPES; i++) {
+ for(int j = 0; j < ATTR_PRIM_TYPES; j++) {
attr_map[index].x = ATTR_STD_NONE;
attr_map[index].y = 0;
attr_map[index].z = 0;
@@ -1564,6 +1823,8 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
scene->params.use_bvh_unaligned_nodes;
+ bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
+ bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
delete bvh;
bvh = BVH::create(bparams, scene->objects);
@@ -1665,6 +1926,7 @@ void MeshManager::device_update_displacement_images(Device *device,
*/
image_manager->device_update(device,
dscene,
+ scene,
progress);
return;
}
@@ -1682,6 +1944,7 @@ void MeshManager::device_update_displacement_images(Device *device,
image_manager,
device,
dscene,
+ scene,
slot,
&progress));
}
@@ -1944,14 +2207,14 @@ bool Mesh::need_attribute(Scene *scene, AttributeStandard std)
{
if(std == ATTR_STD_NONE)
return false;
-
+
if(scene->need_global_attribute(std))
return true;
foreach(Shader *shader, used_shaders)
if(shader->attributes.find(std))
return true;
-
+
return false;
}
@@ -1963,9 +2226,8 @@ bool Mesh::need_attribute(Scene * /*scene*/, ustring name)
foreach(Shader *shader, used_shaders)
if(shader->attributes.find(name))
return true;
-
+
return false;
}
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index c0310f4..5f33e30 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -31,6 +31,7 @@
CCL_NAMESPACE_BEGIN
+class Attribute;
class BVH;
class Device;
class DeviceScene;
@@ -54,11 +55,27 @@ public:
int v[3];
void bounds_grow(const float3 *verts, BoundBox& bounds) const;
+
+ void motion_verts(const float3 *verts,
+ const float3 *vert_steps,
+ size_t num_verts,
+ size_t num_steps,
+ float time,
+ float3 r_verts[3]) const;
+
+ void verts_for_step(const float3 *verts,
+ const float3 *vert_steps,
+ size_t num_verts,
+ size_t num_steps,
+ size_t step,
+ float3 r_verts[3]) const;
};
Triangle get_triangle(size_t i) const
{
- Triangle tri = {{triangles[i*3 + 0], triangles[i*3 + 1], triangles[i*3 + 2]}};
+ Triangle tri = {{triangles[i*3 + 0],
+ triangles[i*3 + 1],
+ triangles[i*3 + 2]}};
return tri;
}
@@ -78,11 +95,48 @@ public:
const float3 *curve_keys,
const float *curve_radius,
BoundBox& bounds) const;
+ void bounds_grow(float4 keys[4], BoundBox& bounds) const;
void bounds_grow(const int k,
const float3 *curve_keys,
const float *curve_radius,
const Transform& aligned_space,
BoundBox& bounds) const;
+
+ void motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0, size_t k1,
+ float4 r_keys[2]) const;
+ void cardinal_motion_keys(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ float time,
+ size_t k0, size_t k1,
+ size_t k2, size_t k3,
+ float4 r_keys[4]) const;
+
+ void keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0, size_t k1,
+ float4 r_keys[2]) const;
+ void cardinal_keys_for_step(const float3 *curve_keys,
+ const float *curve_radius,
+ const float3 *key_steps,
+ size_t num_curve_keys,
+ size_t num_steps,
+ size_t step,
+ size_t k0, size_t k1,
+ size_t k2, size_t k3,
+ float4 r_keys[4]) const;
};
Curve get_curve(size_t i) const
diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp
index 913c3c7..57c76a9 100644
--- a/intern/cycles/render/mesh_subdivision.cpp
+++ b/intern/cycles/render/mesh_subdivision.cpp
@@ -92,7 +92,7 @@ namespace Far {
if(vert_edges.size() == 2) {
float sharpness = refiner.getLevel(0).getEdgeSharpness(vert_edges[0]);
- sharpness = min(sharpness, refiner.getLevel(0).getEdgeSharpness(vert_edges[1]));
+ sharpness = ccl::min(sharpness, refiner.getLevel(0).getEdgeSharpness(vert_edges[1]));
setBaseVertexSharpness(refiner, i, sharpness);
}
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 405b31e..1e4a9fd 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -1257,6 +1257,7 @@ NODE_DEFINE(BrickTextureNode)
SOCKET_IN_COLOR(mortar, "Mortar", make_float3(0.0f, 0.0f, 0.0f));
SOCKET_IN_FLOAT(scale, "Scale", 5.0f);
SOCKET_IN_FLOAT(mortar_size, "Mortar Size", 0.02f);
+ SOCKET_IN_FLOAT(mortar_smooth, "Mortar Smooth", 0.0f);
SOCKET_IN_FLOAT(bias, "Bias", 0.0f);
SOCKET_IN_FLOAT(brick_width, "Brick Width", 0.5f);
SOCKET_IN_FLOAT(row_height, "Row Height", 0.25f);
@@ -1280,6 +1281,7 @@ void BrickTextureNode::compile(SVMCompiler& compiler)
ShaderInput *mortar_in = input("Mortar");
ShaderInput *scale_in = input("Scale");
ShaderInput *mortar_size_in = input("Mortar Size");
+ ShaderInput *mortar_smooth_in = input("Mortar Smooth");
ShaderInput *bias_in = input("Bias");
ShaderInput *brick_width_in = input("Brick Width");
ShaderInput *row_height_in = input("Row Height");
@@ -1303,7 +1305,8 @@ void BrickTextureNode::compile(SVMCompiler& compiler)
compiler.encode_uchar4(
compiler.stack_assign_if_linked(row_height_in),
compiler.stack_assign_if_linked(color_out),
- compiler.stack_assign_if_linked(fac_out)));
+ compiler.stack_assign_if_linked(fac_out),
+ compiler.stack_assign_if_linked(mortar_smooth_in)));
compiler.add_node(compiler.encode_uchar4(offset_frequency, squash_frequency),
__float_as_int(scale),
@@ -1315,6 +1318,11 @@ void BrickTextureNode::compile(SVMCompiler& compiler)
__float_as_int(offset),
__float_as_int(squash));
+ compiler.add_node(__float_as_int(mortar_smooth),
+ SVM_STACK_INVALID,
+ SVM_STACK_INVALID,
+ SVM_STACK_INVALID);
+
tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
@@ -1434,14 +1442,14 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler)
else {
if(use_density) {
compiler.add_node(NODE_VALUE_F,
- __float_as_int(0.0f),
- compiler.stack_assign(density_out));
+ __float_as_int(0.0f),
+ compiler.stack_assign(density_out));
}
if(use_color) {
compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
compiler.add_node(NODE_VALUE_V, make_float3(TEX_IMAGE_MISSING_R,
- TEX_IMAGE_MISSING_G,
- TEX_IMAGE_MISSING_B));
+ TEX_IMAGE_MISSING_G,
+ TEX_IMAGE_MISSING_B));
}
}
}
@@ -2413,7 +2421,7 @@ void BackgroundNode::compile(SVMCompiler& compiler)
if(color_in->link || strength_in->link) {
compiler.add_node(NODE_EMISSION_WEIGHT,
compiler.stack_assign(color_in),
- compiler.stack_assign(strength_in));
+ compiler.stack_assign(strength_in));
}
else
compiler.add_node(NODE_CLOSURE_SET_WEIGHT, color*strength);
@@ -3019,6 +3027,8 @@ NODE_DEFINE(LightPathNode)
SOCKET_OUT_FLOAT(is_volume_scatter_ray, "Is Volume Scatter Ray");
SOCKET_OUT_FLOAT(ray_length, "Ray Length");
SOCKET_OUT_FLOAT(ray_depth, "Ray Depth");
+ SOCKET_OUT_FLOAT(diffuse_depth, "Diffuse Depth");
+ SOCKET_OUT_FLOAT(glossy_depth, "Glossy Depth");
SOCKET_OUT_FLOAT(transparent_depth, "Transparent Depth");
SOCKET_OUT_FLOAT(transmission_depth, "Transmission Depth");
@@ -3085,6 +3095,16 @@ void LightPathNode::compile(SVMCompiler& compiler)
compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_depth, compiler.stack_assign(out));
}
+ out = output("Diffuse Depth");
+ if(!out->links.empty()) {
+ compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_diffuse, compiler.stack_assign(out));
+ }
+
+ out = output("Glossy Depth");
+ if(!out->links.empty()) {
+ compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_glossy, compiler.stack_assign(out));
+ }
+
out = output("Transparent Depth");
if(!out->links.empty()) {
compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_transparent, compiler.stack_assign(out));
@@ -3898,6 +3918,19 @@ void GammaNode::constant_fold(const ConstantFolder& folder)
if(folder.all_inputs_constant()) {
folder.make_constant(svm_math_gamma_color(color, gamma));
}
+ else {
+ ShaderInput *color_in = input("Color");
+ ShaderInput *gamma_in = input("Gamma");
+
+ /* 1 ^ X == X ^ 0 == 1 */
+ if(folder.is_one(color_in) || folder.is_zero(gamma_in)) {
+ folder.make_one();
+ }
+ /* X ^ 1 == X */
+ else if(folder.is_one(gamma_in)) {
+ folder.try_bypass_or_make_constant(color_in, false);
+ }
+ }
}
void GammaNode::compile(SVMCompiler& compiler)
@@ -3972,7 +4005,7 @@ NODE_DEFINE(SeparateRGBNode)
SOCKET_IN_COLOR(color, "Image", make_float3(0.0f, 0.0f, 0.0f));
- SOCKET_OUT_FLOAT(g, "R");
+ SOCKET_OUT_FLOAT(r, "R");
SOCKET_OUT_FLOAT(g, "G");
SOCKET_OUT_FLOAT(b, "B");
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 13791c6..eb0f797 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -243,7 +243,7 @@ public:
int offset_frequency, squash_frequency;
float3 color1, color2, mortar;
- float scale, mortar_size, bias, brick_width, row_height;
+ float scale, mortar_size, mortar_smooth, bias, brick_width, row_height;
float3 vector;
virtual int get_group() { return NODE_GROUP_LEVEL_2; }
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 8b8b988..c592b62 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -166,7 +166,7 @@ void Object::apply_transform(bool apply_to_motion)
float3 c0 = transform_get_column(&tfm, 0);
float3 c1 = transform_get_column(&tfm, 1);
float3 c2 = transform_get_column(&tfm, 2);
- float scalar = pow(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f);
+ float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f);
/* apply transform to curve keys */
for(size_t i = 0; i < mesh->curve_keys.size(); i++) {
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 18a32f7..67b68e6 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -825,7 +825,7 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
// OSL does not support booleans, so convert to int
const array<bool>& value = node->get_bool_array(socket);
array<int> intvalue(value.size());
- for (size_t i = 0; i < value.size(); i++)
+ for(size_t i = 0; i < value.size(); i++)
intvalue[i] = value[i];
ss->Parameter(uname, array_typedesc(TypeDesc::TypeInt, value.size()), intvalue.data());
break;
@@ -861,8 +861,7 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
// convert to tightly packed array since float3 has padding
const array<float3>& value = node->get_float3_array(socket);
array<float> fvalue(value.size() * 3);
- for (size_t i = 0, j = 0; i < value.size(); i++)
- {
+ for(size_t i = 0, j = 0; i < value.size(); i++) {
fvalue[j++] = value[i].x;
fvalue[j++] = value[i].y;
fvalue[j++] = value[i].z;
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index b341837..68124e7 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -187,7 +187,7 @@ void Scene::device_update(Device *device_, Progress& progress)
if(progress.get_cancel() || device->have_error()) return;
progress.set_status("Updating Images");
- image_manager->device_update(device, &dscene, progress);
+ image_manager->device_update(device, &dscene, this, progress);
if(progress.get_cancel() || device->have_error()) return;
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 8fec171..8768682 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -143,8 +143,10 @@ public:
} bvh_type;
bool use_bvh_spatial_split;
bool use_bvh_unaligned_nodes;
+ int num_bvh_time_steps;
bool use_qbvh;
bool persistent_data;
+ int texture_limit;
SceneParams()
{
@@ -152,8 +154,10 @@ public:
bvh_type = BVH_DYNAMIC;
use_bvh_spatial_split = false;
use_bvh_unaligned_nodes = true;
+ num_bvh_time_steps = 0;
use_qbvh = false;
persistent_data = false;
+ texture_limit = 0;
}
bool modified(const SceneParams& params)
@@ -161,8 +165,10 @@ public:
&& bvh_type == params.bvh_type
&& use_bvh_spatial_split == params.use_bvh_spatial_split
&& use_bvh_unaligned_nodes == params.use_bvh_unaligned_nodes
+ && num_bvh_time_steps == params.num_bvh_time_steps
&& use_qbvh == params.use_qbvh
- && persistent_data == params.persistent_data); }
+ && persistent_data == params.persistent_data
+ && texture_limit == params.texture_limit); }
};
/* Scene */
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 9d8c9fe..7c01934 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -67,10 +67,7 @@ Session::Session(const SessionParams& params_)
session_thread = NULL;
scene = NULL;
- start_time = 0.0;
reset_time = 0.0;
- preview_time = 0.0;
- paused_time = 0.0;
last_update_time = 0.0;
delayed_reset.do_reset = false;
@@ -201,12 +198,10 @@ void Session::run_gpu()
{
bool tiles_written = false;
- start_time = time_dt();
reset_time = time_dt();
- paused_time = 0.0;
last_update_time = time_dt();
- progress.set_render_start_time(start_time + paused_time);
+ progress.set_render_start_time();
while(!progress.get_cancel()) {
/* advance to next tile */
@@ -233,13 +228,9 @@ void Session::run_gpu()
update_status_time(pause, no_tiles);
while(1) {
- double pause_start = time_dt();
+ scoped_timer pause_timer;
pause_cond.wait(pause_lock);
- paused_time += time_dt() - pause_start;
-
- if(!params.background)
- progress.set_start_time(start_time + paused_time);
- progress.set_render_start_time(start_time + paused_time);
+ progress.add_skip_time(pause_timer, params.background);
update_status_time(pause, no_tiles);
progress.set_update();
@@ -255,7 +246,9 @@ void Session::run_gpu()
if(!no_tiles) {
/* update scene */
+ scoped_timer update_timer;
update_scene();
+ progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
progress.set_error(device->error_message());
@@ -465,6 +458,8 @@ void Session::release_tile(RenderTile& rtile)
{
thread_scoped_lock tile_lock(tile_mutex);
+ progress.add_finished_tile();
+
if(write_render_tile_cb) {
if(params.progressive_refine == false) {
/* todo: optimize this by making it thread safe and removing lock */
@@ -523,13 +518,9 @@ void Session::run_cpu()
update_status_time(pause, no_tiles);
while(1) {
- double pause_start = time_dt();
+ scoped_timer pause_timer;
pause_cond.wait(pause_lock);
- paused_time += time_dt() - pause_start;
-
- if(!params.background)
- progress.set_start_time(start_time + paused_time);
- progress.set_render_start_time(start_time + paused_time);
+ progress.add_skip_time(pause_timer, params.background);
update_status_time(pause, no_tiles);
progress.set_update();
@@ -550,7 +541,9 @@ void Session::run_cpu()
thread_scoped_lock buffers_lock(buffers_mutex);
/* update scene */
+ scoped_timer update_timer;
update_scene();
+ progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
progress.set_error(device->error_message());
@@ -645,6 +638,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
BakeManager *bake_manager = scene->bake_manager;
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH);
+ requested_features.use_transparent &= scene->integrator->transparent_shadows;
return requested_features;
}
@@ -718,14 +712,14 @@ void Session::reset_(BufferParams& buffer_params, int samples)
}
tile_manager.reset(buffer_params, samples);
+ progress.reset_sample();
- start_time = time_dt();
- preview_time = 0.0;
- paused_time = 0.0;
+ bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
+ progress.set_total_pixel_samples(show_progress? tile_manager.state.total_pixel_samples : 0);
if(!params.background)
- progress.set_start_time(start_time);
- progress.set_render_start_time(start_time);
+ progress.set_start_time();
+ progress.set_render_start_time();
}
void Session::reset(BufferParams& buffer_params, int samples)
@@ -827,61 +821,40 @@ void Session::update_scene()
void Session::update_status_time(bool show_pause, bool show_done)
{
- int sample = tile_manager.state.sample;
- int resolution = tile_manager.state.resolution_divider;
- int num_tiles = tile_manager.state.num_tiles;
+ int progressive_sample = tile_manager.state.sample;
+ int num_samples = tile_manager.get_num_effective_samples();
+
int tile = tile_manager.state.num_rendered_tiles;
+ int num_tiles = tile_manager.state.num_tiles;
/* update status */
string status, substatus;
if(!params.progressive) {
- const int progress_sample = progress.get_sample(),
- num_samples = tile_manager.get_num_effective_samples();
- const bool is_gpu = params.device.type == DEVICE_CUDA || params.device.type == DEVICE_OPENCL;
- const bool is_multidevice = params.device.multi_devices.size() > 1;
const bool is_cpu = params.device.type == DEVICE_CPU;
- const bool is_last_tile = (num_samples * num_tiles - progress_sample) < num_samples;
+ const bool is_last_tile = (progress.get_finished_tiles() + 1) == num_tiles;
substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles);
- if((is_gpu && !is_multidevice && !device->info.use_split_kernel) ||
- (is_cpu && (num_tiles == 1 || is_last_tile)))
- {
- /* When using split-kernel (OpenCL) each thread in a tile will be working on a different
- * sample. Can't display sample number when device uses split-kernel
- */
-
- /* when rendering on GPU multithreading happens within single tile, as in
- * tiles are handling sequentially and in this case we could display
- * currently rendering sample number
- * this helps a lot from feedback point of view.
- * also display the info on CPU, when using 1 tile only
+ if(device->show_samples() || (is_cpu && is_last_tile)) {
+ /* Some devices automatically support showing the sample number:
+ * - CUDADevice
+ * - OpenCLDevice when using the megakernel (the split kernel renders multiple
+ * samples at the same time, so the current sample isn't really defined)
+ * - CPUDevice when using one thread
+ * For these devices, the current sample is always shown.
+ *
+ * The other option is when the last tile is currently being rendered by the CPU.
*/
-
- int status_sample = progress_sample;
- if(tile > 1) {
- /* sample counter is global for all tiles, subtract samples
- * from already finished tiles to get sample counter for
- * current tile only
- */
- if(is_cpu && is_last_tile && num_tiles > 1) {
- status_sample = num_samples - (num_samples * num_tiles - progress_sample);
- }
- else {
- status_sample -= (tile - 1) * num_samples;
- }
- }
-
- substatus += string_printf(", Sample %d/%d", status_sample, num_samples);
+ substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
}
}
else if(tile_manager.num_samples == INT_MAX)
- substatus = string_printf("Path Tracing Sample %d", sample+1);
+ substatus = string_printf("Path Tracing Sample %d", progressive_sample+1);
else
substatus = string_printf("Path Tracing Sample %d/%d",
- sample+1,
- tile_manager.get_num_effective_samples());
+ progressive_sample+1,
+ num_samples);
if(show_pause) {
status = "Paused";
@@ -895,22 +868,6 @@ void Session::update_status_time(bool show_pause, bool show_done)
}
progress.set_status(status, substatus);
-
- /* update timing */
- if(preview_time == 0.0 && resolution == 1)
- preview_time = time_dt();
-
- double tile_time = (tile == 0 || sample == 0)? 0.0: (time_dt() - preview_time - paused_time) / sample;
-
- /* negative can happen when we pause a bit before rendering, can discard that */
- if(preview_time < 0.0) preview_time = 0.0;
-
- progress.set_tile(tile, tile_time);
-}
-
-void Session::update_progress_sample()
-{
- progress.increment_sample();
}
void Session::path_trace()
@@ -922,7 +879,7 @@ void Session::path_trace()
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
- task.update_progress_sample = function_bind(&Session::update_progress_sample, this);
+ task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
task.requested_tile_size = params.tile_size;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 8bff0f9..c7ff144 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -89,8 +89,7 @@ public:
}
bool modified(const SessionParams& params)
- { return !(device.type == params.device.type
- && device.id == params.device.id
+ { return !(device == params.device
&& background == params.background
&& progressive_refine == params.progressive_refine
&& output_path == params.output_path
@@ -146,6 +145,10 @@ public:
void device_free();
+ /* Returns the rendering progress or 0 if no progress can be determined
+ * (for example, when rendering with unlimited samples). */
+ float get_progress();
+
protected:
struct DelayedReset {
thread_mutex mutex;
@@ -174,8 +177,6 @@ protected:
void update_tile_sample(RenderTile& tile);
void release_tile(RenderTile& tile);
- void update_progress_sample();
-
bool device_use_gl;
thread *session_thread;
@@ -195,10 +196,7 @@ protected:
bool kernels_loaded;
- double start_time;
double reset_time;
- double preview_time;
- double paused_time;
/* progressive refine */
double last_update_time;
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 70e1443..335edcb 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -194,6 +194,28 @@ Shader::~Shader()
delete graph_bump;
}
+bool Shader::is_constant_emission(float3 *emission)
+{
+ ShaderInput *surf = graph->output()->input("Surface");
+
+ if(!surf->link || surf->link->parent->type != EmissionNode::node_type) {
+ return false;
+ }
+
+ EmissionNode *node = (EmissionNode*) surf->link->parent;
+
+ assert(node->input("Color"));
+ assert(node->input("Strength"));
+
+ if(node->input("Color")->link || node->input("Strength")->link) {
+ return false;
+ }
+
+ *emission = node->color*node->strength;
+
+ return true;
+}
+
void Shader::set_graph(ShaderGraph *graph_)
{
/* do this here already so that we can detect if mesh or object attributes
@@ -379,7 +401,7 @@ void ShaderManager::device_update_common(Device *device,
if(scene->shaders.size() == 0)
return;
- uint shader_flag_size = scene->shaders.size()*2;
+ uint shader_flag_size = scene->shaders.size()*SHADER_SIZE;
uint *shader_flag = dscene->shader_flag.resize(shader_flag_size);
uint i = 0;
bool has_volumes = false;
@@ -424,9 +446,17 @@ void ShaderManager::device_update_common(Device *device,
if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump)
flag |= SD_HAS_BSSRDF_BUMP;
+ /* constant emission check */
+ float3 constant_emission = make_float3(0.0f, 0.0f, 0.0f);
+ if(shader->is_constant_emission(&constant_emission))
+ flag |= SD_HAS_CONSTANT_EMISSION;
+
/* regular shader */
shader_flag[i++] = flag;
shader_flag[i++] = shader->pass_id;
+ shader_flag[i++] = __float_as_int(constant_emission.x);
+ shader_flag[i++] = __float_as_int(constant_emission.y);
+ shader_flag[i++] = __float_as_int(constant_emission.z);
has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
}
@@ -541,6 +571,9 @@ void ShaderManager::get_requested_graph_features(ShaderGraph *graph,
if(node->has_surface_bssrdf()) {
requested_features->use_subsurface = true;
}
+ if(node->has_surface_transparent()) {
+ requested_features->use_transparent = true;
+ }
}
}
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index 696e22b..7d89665 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -139,6 +139,10 @@ public:
Shader();
~Shader();
+ /* Checks whether the shader consists of just a emission node with fixed inputs that's connected directly to the output.
+ * If yes, it sets the content of emission to the constant value (color * strength), which is then used for speeding up light evaluation. */
+ bool is_constant_emission(float3* emission);
+
void set_graph(ShaderGraph *graph);
void tag_update(Scene *scene);
void tag_used(Scene *scene);
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index 0332106..955b892 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -27,6 +27,7 @@
#include "util_logging.h"
#include "util_foreach.h"
#include "util_progress.h"
+#include "util_task.h"
CCL_NAMESPACE_BEGIN
@@ -44,6 +45,51 @@ void SVMShaderManager::reset(Scene * /*scene*/)
{
}
+void SVMShaderManager::device_update_shader(Scene *scene,
+ Shader *shader,
+ Progress *progress,
+ vector<int4> *global_svm_nodes)
+{
+ if(progress->get_cancel()) {
+ return;
+ }
+ assert(shader->graph);
+
+ vector<int4> svm_nodes;
+ svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
+
+ SVMCompiler::Summary summary;
+ SVMCompiler compiler(scene->shader_manager, scene->image_manager);
+ compiler.background = (shader == scene->default_background);
+ compiler.compile(scene, shader, svm_nodes, 0, &summary);
+
+ VLOG(2) << "Compilation summary:\n"
+ << "Shader name: " << shader->name << "\n"
+ << summary.full_report();
+
+ if(shader->use_mis && shader->has_surface_emission) {
+ scene->light_manager->need_update = true;
+ }
+
+ /* The copy needs to be done inside the lock, if another thread resizes the array
+ * while memcpy is running, it'll be copying into possibly invalid/freed ram.
+ */
+ nodes_lock_.lock();
+ size_t global_nodes_size = global_svm_nodes->size();
+ global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
+
+ /* Offset local SVM nodes to a global address space. */
+ int4& jump_node = global_svm_nodes->at(shader->id);
+ jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
+ jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
+ jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
+ /* Copy new nodes to global storage. */
+ memcpy(&global_svm_nodes->at(global_nodes_size),
+ &svm_nodes[1],
+ sizeof(int4) * (svm_nodes.size() - 1));
+ nodes_lock_.unlock();
+}
+
void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
{
if(!need_update)
@@ -51,6 +97,8 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
+ double start_time = time_dt();
+
/* test if we need to update */
device_free(device, dscene, scene);
@@ -65,23 +113,20 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
}
+ TaskPool task_pool;
foreach(Shader *shader, scene->shaders) {
- if(progress.get_cancel()) return;
-
- assert(shader->graph);
-
- SVMCompiler::Summary summary;
- SVMCompiler compiler(scene->shader_manager, scene->image_manager);
- compiler.background = (shader == scene->default_background);
- compiler.compile(scene, shader, svm_nodes, shader->id, &summary);
-
- if(shader->use_mis && shader->has_surface_emission) {
- scene->light_manager->need_update = true;
- }
+ task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
+ this,
+ scene,
+ shader,
+ &progress,
+ &svm_nodes),
+ false);
+ }
+ task_pool.wait_work();
- VLOG(2) << "Compilation summary:\n"
- << "Shader name: " << shader->name << "\n"
- << summary.full_report();
+ if(progress.get_cancel()) {
+ return;
}
dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
@@ -95,6 +140,10 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
device_update_common(device, dscene, scene, progress);
need_update = false;
+
+ VLOG(1) << "Shader manager updated "
+ << scene->shaders.size() << " shaders in "
+ << time_dt() - start_time << " seconds.";
}
void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
@@ -317,17 +366,17 @@ uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
void SVMCompiler::add_node(int a, int b, int c, int d)
{
- svm_nodes.push_back(make_int4(a, b, c, d));
+ current_svm_nodes.push_back(make_int4(a, b, c, d));
}
void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
{
- svm_nodes.push_back(make_int4(type, a, b, c));
+ current_svm_nodes.push_back(make_int4(type, a, b, c));
}
void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
{
- svm_nodes.push_back(make_int4(type,
+ current_svm_nodes.push_back(make_int4(type,
__float_as_int(f.x),
__float_as_int(f.y),
__float_as_int(f.z)));
@@ -335,7 +384,7 @@ void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
void SVMCompiler::add_node(const float4& f)
{
- svm_nodes.push_back(make_int4(
+ current_svm_nodes.push_back(make_int4(
__float_as_int(f.x),
__float_as_int(f.y),
__float_as_int(f.z),
@@ -572,26 +621,38 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
/* generate instructions for input closure 1 */
if(cl1in->link) {
- /* add instruction to skip closure and its dependencies if mix weight is zero */
- svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE, 0, stack_assign(facin), 0));
- int node_jump_skip_index = svm_nodes.size() - 1;
+ /* Add instruction to skip closure and its dependencies if mix
+ * weight is zero.
+ */
+ current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE,
+ 0,
+ stack_assign(facin),
+ 0));
+ int node_jump_skip_index = current_svm_nodes.size() - 1;
generate_multi_closure(root_node, cl1in->link->parent, state);
- /* fill in jump instruction location to be after closure */
- svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1;
+ /* Fill in jump instruction location to be after closure. */
+ current_svm_nodes[node_jump_skip_index].y =
+ current_svm_nodes.size() - node_jump_skip_index - 1;
}
/* generate instructions for input closure 2 */
if(cl2in->link) {
- /* add instruction to skip closure and its dependencies if mix weight is zero */
- svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO, 0, stack_assign(facin), 0));
- int node_jump_skip_index = svm_nodes.size() - 1;
+ /* Add instruction to skip closure and its dependencies if mix
+ * weight is zero.
+ */
+ current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO,
+ 0,
+ stack_assign(facin),
+ 0));
+ int node_jump_skip_index = current_svm_nodes.size() - 1;
generate_multi_closure(root_node, cl2in->link->parent, state);
- /* fill in jump instruction location to be after closure */
- svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1;
+ /* Fill in jump instruction location to be after closure. */
+ current_svm_nodes[node_jump_skip_index].y =
+ current_svm_nodes.size() - node_jump_skip_index - 1;
}
/* unassign */
@@ -661,7 +722,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
/* clear all compiler state */
memset(&active_stack, 0, sizeof(active_stack));
- svm_nodes.clear();
+ current_svm_nodes.clear();
foreach(ShaderNode *node_iter, graph->nodes) {
foreach(ShaderInput *input, node_iter->inputs)
@@ -721,7 +782,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
/* if compile failed, generate empty shader */
if(compile_failed) {
- svm_nodes.clear();
+ current_svm_nodes.clear();
compile_failed = false;
}
@@ -733,13 +794,13 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
void SVMCompiler::compile(Scene *scene,
Shader *shader,
- vector<int4>& global_svm_nodes,
+ vector<int4>& svm_nodes,
int index,
Summary *summary)
{
/* copy graph for shader with bump mapping */
ShaderNode *node = shader->graph->output();
- int start_num_svm_nodes = global_svm_nodes.size();
+ int start_num_svm_nodes = svm_nodes.size();
const double time_start = time_dt();
@@ -783,8 +844,10 @@ void SVMCompiler::compile(Scene *scene,
if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump) {
scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
compile_type(shader, shader->graph_bump, SHADER_TYPE_BUMP);
- global_svm_nodes[index].y = global_svm_nodes.size();
- global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+ svm_nodes[index].y = svm_nodes.size();
+ svm_nodes.insert(svm_nodes.end(),
+ current_svm_nodes.begin(),
+ current_svm_nodes.end());
}
/* generate surface shader */
@@ -793,32 +856,38 @@ void SVMCompiler::compile(Scene *scene,
compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
/* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
if(shader->displacement_method == DISPLACE_TRUE || !shader->graph_bump) {
- global_svm_nodes[index].y = global_svm_nodes.size();
+ svm_nodes[index].y = svm_nodes.size();
}
- global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+ svm_nodes.insert(svm_nodes.end(),
+ current_svm_nodes.begin(),
+ current_svm_nodes.end());
}
/* generate volume shader */
{
scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
- global_svm_nodes[index].z = global_svm_nodes.size();
- global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+ svm_nodes[index].z = svm_nodes.size();
+ svm_nodes.insert(svm_nodes.end(),
+ current_svm_nodes.begin(),
+ current_svm_nodes.end());
}
/* generate displacement shader */
{
scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
- global_svm_nodes[index].w = global_svm_nodes.size();
- global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+ svm_nodes[index].w = svm_nodes.size();
+ svm_nodes.insert(svm_nodes.end(),
+ current_svm_nodes.begin(),
+ current_svm_nodes.end());
}
/* Fill in summary information. */
if(summary != NULL) {
summary->time_total = time_dt() - time_start;
summary->peak_stack_usage = max_stack_use;
- summary->num_svm_nodes = global_svm_nodes.size() - start_num_svm_nodes;
+ summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
}
}
diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h
index 99e91ca..a501b6b 100644
--- a/intern/cycles/render/svm.h
+++ b/intern/cycles/render/svm.h
@@ -23,6 +23,7 @@
#include "util_set.h"
#include "util_string.h"
+#include "util_thread.h"
CCL_NAMESPACE_BEGIN
@@ -46,6 +47,15 @@ public:
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
void device_free(Device *device, DeviceScene *dscene, Scene *scene);
+
+protected:
+ /* Lock used to synchronize threaded nodes compilation. */
+ thread_spin_lock nodes_lock_;
+
+ void device_update_shader(Scene *scene,
+ Shader *shader,
+ Progress *progress,
+ vector<int4> *global_svm_nodes);
};
/* Graph Compiler */
@@ -200,7 +210,7 @@ protected:
/* compile */
void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type);
- vector<int4> svm_nodes;
+ vector<int4> current_svm_nodes;
ShaderType current_type;
Shader *current_shader;
ShaderGraph *current_graph;
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 3a6dfea..a493c3f 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -108,36 +108,57 @@ TileManager::~TileManager()
{
}
-void TileManager::reset(BufferParams& params_, int num_samples_)
+static int get_divider(int w, int h, int start_resolution)
{
- params = params_;
-
int divider = 1;
- int w = params.width, h = params.height;
-
if(start_resolution != INT_MAX) {
while(w*h > start_resolution*start_resolution) {
w = max(1, w/2);
h = max(1, h/2);
- divider *= 2;
+ divider <<= 1;
}
}
+ return divider;
+}
- num_samples = num_samples_;
+void TileManager::reset(BufferParams& params_, int num_samples_)
+{
+ params = params_;
+
+ set_samples(num_samples_);
state.buffer = BufferParams();
state.sample = range_start_sample - 1;
state.num_tiles = 0;
state.num_rendered_tiles = 0;
state.num_samples = 0;
- state.resolution_divider = divider;
+ state.resolution_divider = get_divider(params.width, params.height, start_resolution);
state.tiles.clear();
}
void TileManager::set_samples(int num_samples_)
{
num_samples = num_samples_;
+
+ /* No real progress indication is possible when using unlimited samples. */
+ if(num_samples == INT_MAX) {
+ state.total_pixel_samples = 0;
+ }
+ else {
+ uint64_t pixel_samples = 0;
+ /* While rendering in the viewport, the initial preview resolution is increased to the native resolution
+ * before the actual rendering begins. Therefore, additional pixel samples will be rendered. */
+ int divider = get_divider(params.width, params.height, start_resolution) / 2;
+ while(divider > 1) {
+ int image_w = max(1, params.width/divider);
+ int image_h = max(1, params.height/divider);
+ pixel_samples += image_w * image_h;
+ divider >>= 1;
+ }
+
+ state.total_pixel_samples = pixel_samples + (uint64_t)get_num_effective_samples() * params.width*params.height;
+ }
}
/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device.
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index af1b1ed..5d92eba 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -64,6 +64,10 @@ public:
int resolution_divider;
int num_tiles;
int num_rendered_tiles;
+
+ /* Total samples over all pixels: Generally num_samples*num_pixels,
+ * but can be higher due to the initial resolution division for previews. */
+ uint64_t total_pixel_samples;
/* This vector contains a list of tiles for every logical device in the session.
* In each list, the tiles are sorted according to the tile order setting. */
vector<list<Tile> > tiles;
@@ -91,7 +95,7 @@ public:
/* Number to samples in the rendering range. */
int range_num_samples;
- /* get number of actual samples to render. */
+ /* Get number of actual samples to render. */
int get_num_effective_samples();
protected:
diff --git a/intern/cycles/subd/subd_patch_table.cpp b/intern/cycles/subd/subd_patch_table.cpp
index 62572ef..d437b04 100644
--- a/intern/cycles/subd/subd_patch_table.cpp
+++ b/intern/cycles/subd/subd_patch_table.cpp
@@ -46,7 +46,7 @@ struct PatchMapQuadNode {
/* sets all the children to point to the patch of index */
void set_child(int index)
{
- for (int i = 0; i < 4; i++) {
+ for(int i = 0; i < 4; i++) {
children[i] = index | PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF;
}
}
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 60e41be..32b4c72 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -931,6 +931,72 @@ TEST(render_graph, constant_fold_gamma)
}
/*
+ * Tests: Gamma with one constant 0 input.
+ */
+TEST(render_graph, constant_fold_gamma_part_0)
+{
+ DEFINE_COMMON_VARIABLES(builder, log);
+
+ EXPECT_ANY_MESSAGE(log);
+ INVALID_INFO_MESSAGE(log, "Folding Gamma_Cx::");
+ CORRECT_INFO_MESSAGE(log, "Folding Gamma_xC::Color to constant (1, 1, 1).");
+
+ builder
+ .add_attribute("Attribute")
+ /* constant on the left */
+ .add_node(ShaderNodeBuilder<GammaNode>("Gamma_Cx")
+ .set("Color", make_float3(0.0f, 0.0f, 0.0f)))
+ .add_connection("Attribute::Fac", "Gamma_Cx::Gamma")
+ /* constant on the right */
+ .add_node(ShaderNodeBuilder<GammaNode>("Gamma_xC")
+ .set("Gamma", 0.0f))
+ .add_connection("Attribute::Color", "Gamma_xC::Color")
+ /* output sum */
+ .add_node(ShaderNodeBuilder<MixNode>("Out")
+ .set(&MixNode::type, NODE_MIX_ADD)
+ .set(&MixNode::use_clamp, true)
+ .set("Fac", 1.0f))
+ .add_connection("Gamma_Cx::Color", "Out::Color1")
+ .add_connection("Gamma_xC::Color", "Out::Color2")
+ .output_color("Out::Color");
+
+ graph.finalize(&scene);
+}
+
+/*
+ * Tests: Gamma with one constant 1 input.
+ */
+TEST(render_graph, constant_fold_gamma_part_1)
+{
+ DEFINE_COMMON_VARIABLES(builder, log);
+
+ EXPECT_ANY_MESSAGE(log);
+ CORRECT_INFO_MESSAGE(log, "Folding Gamma_Cx::Color to constant (1, 1, 1).");
+ CORRECT_INFO_MESSAGE(log, "Folding Gamma_xC::Color to socket Attribute::Color.");
+
+ builder
+ .add_attribute("Attribute")
+ /* constant on the left */
+ .add_node(ShaderNodeBuilder<GammaNode>("Gamma_Cx")
+ .set("Color", make_float3(1.0f, 1.0f, 1.0f)))
+ .add_connection("Attribute::Fac", "Gamma_Cx::Gamma")
+ /* constant on the right */
+ .add_node(ShaderNodeBuilder<GammaNode>("Gamma_xC")
+ .set("Gamma", 1.0f))
+ .add_connection("Attribute::Color", "Gamma_xC::Color")
+ /* output sum */
+ .add_node(ShaderNodeBuilder<MixNode>("Out")
+ .set(&MixNode::type, NODE_MIX_ADD)
+ .set(&MixNode::use_clamp, true)
+ .set("Fac", 1.0f))
+ .add_connection("Gamma_Cx::Color", "Out::Color1")
+ .add_connection("Gamma_xC::Color", "Out::Color2")
+ .output_color("Out::Color");
+
+ graph.finalize(&scene);
+}
+
+/*
* Tests: BrightnessContrast with all constant inputs.
*/
TEST(render_graph, constant_fold_bright_contrast)
@@ -1143,6 +1209,40 @@ TEST(render_graph, constant_fold_part_math_div_0)
}
/*
+ * Tests: partial folding for Math Power with known 0.
+ */
+TEST(render_graph, constant_fold_part_math_pow_0)
+{
+ DEFINE_COMMON_VARIABLES(builder, log);
+
+ EXPECT_ANY_MESSAGE(log);
+ /* X ^ 0 == 1 */
+ INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
+ CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (1).");
+ INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+ build_math_partial_test_graph(builder, NODE_MATH_POWER, 0.0f);
+ graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Math Power with known 1.
+ */
+TEST(render_graph, constant_fold_part_math_pow_1)
+{
+ DEFINE_COMMON_VARIABLES(builder, log);
+
+ EXPECT_ANY_MESSAGE(log);
+ /* 1 ^ X == 1; X ^ 1 == X */
+ CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (1)");
+ CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
+ INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+ build_math_partial_test_graph(builder, NODE_MATH_POWER, 1.0f);
+ graph.finalize(&scene);
+}
+
+/*
* Tests: Vector Math with all constant inputs.
*/
TEST(render_graph, constant_fold_vector_math)
@@ -1307,8 +1407,9 @@ void init_test_curve(array<T> &buffer, T start, T end, int steps)
{
buffer.resize(steps);
- for (int i = 0; i < steps; i++)
+ for(int i = 0; i < steps; i++) {
buffer[i] = lerp(start, end, float(i)/(steps-1));
+ }
}
/*
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index f5674bd..d8abf67 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -45,6 +45,7 @@ set(SRC_HEADERS
util_half.h
util_hash.h
util_image.h
+ util_image_impl.h
util_list.h
util_logging.h
util_map.h
@@ -63,6 +64,7 @@ set(SRC_HEADERS
util_sky_model.cpp
util_sky_model.h
util_sky_model_data.h
+ util_avxf.h
util_sseb.h
util_ssef.h
util_ssei.h
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 1d1e296..433e41f 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -39,7 +39,7 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)
/* Float atomics implementation credits:
* http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html
*/
-ccl_device_inline void atomic_add_float(volatile ccl_global float *source,
+ccl_device_inline void atomic_add_and_fetch_float(volatile ccl_global float *source,
const float operand)
{
union {
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
new file mode 100644
index 0000000..2451213
--- /dev/null
+++ b/intern/cycles/util/util_avxf.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_AVXF_H__
+#define __UTIL_AVXF_H__
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_AVX__
+struct avxf
+{
+ typedef avxf Float;
+
+ enum { size = 8 }; /* Number of SIMD elements. */
+
+ union {
+ __m256 m256;
+ float f[8];
+ int i[8];
+ };
+
+ __forceinline avxf () {}
+ __forceinline avxf (const avxf& other) { m256 = other.m256; }
+ __forceinline avxf& operator=(const avxf& other) { m256 = other.m256; return *this; }
+
+ __forceinline avxf(const __m256 a) : m256(a) {}
+ __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps (a)) {}
+
+ __forceinline operator const __m256&(void) const { return m256; }
+ __forceinline operator __m256&(void) { return m256; }
+
+ __forceinline avxf (float a) : m256(_mm256_set1_ps(a)) {}
+
+ __forceinline avxf(float high32x4, float low32x4) :
+ m256(_mm256_set_ps(high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4)) {}
+
+ __forceinline avxf(float a3, float a2, float a1, float a0) :
+ m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0)) {}
+
+ __forceinline avxf(float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) :
+ m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) {}
+
+
+ __forceinline avxf(int a3, int a2, int a1, int a0)
+ {
+ const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0);
+ m256 = _mm256_castsi256_ps(foo);
+ }
+
+
+ __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0)
+ {
+ const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0);
+ m256 = _mm256_castsi256_ps(foo);
+ }
+
+ __forceinline avxf(__m128 a, __m128 b)
+ {
+ const __m256 foo = _mm256_castps128_ps256(a);
+ m256 = _mm256_insertf128_ps(foo, b, 1);
+ }
+
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxf mm256_sqrt(const avxf& a) { return _mm256_sqrt_ps(a.m256); }
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxf operator +(const avxf& a, const avxf& b) { return _mm256_add_ps(a.m256, b.m256); }
+__forceinline const avxf operator +(const avxf& a, const float& b) { return a + avxf(b); }
+__forceinline const avxf operator +(const float& a, const avxf& b) { return avxf(a) + b; }
+
+__forceinline const avxf operator -(const avxf& a, const avxf& b) { return _mm256_sub_ps(a.m256, b.m256); }
+__forceinline const avxf operator -(const avxf& a, const float& b) { return a - avxf(b); }
+__forceinline const avxf operator -(const float& a, const avxf& b) { return avxf(a) - b; }
+
+__forceinline const avxf operator *(const avxf& a, const avxf& b) { return _mm256_mul_ps(a.m256, b.m256); }
+__forceinline const avxf operator *(const avxf& a, const float& b) { return a * avxf(b); }
+__forceinline const avxf operator *(const float& a, const avxf& b) { return avxf(a) * b; }
+
+__forceinline const avxf operator /(const avxf& a, const avxf& b) { return _mm256_div_ps(a.m256,b.m256); }
+__forceinline const avxf operator /(const avxf& a, const float& b) { return a/avxf(b); }
+__forceinline const avxf operator /(const float& a, const avxf& b) { return avxf(a)/b; }
+
+__forceinline const avxf operator|(const avxf& a, const avxf& b) { return _mm256_or_ps(a.m256,b.m256); }
+
+__forceinline const avxf operator^(const avxf& a, const avxf& b) { return _mm256_xor_ps(a.m256,b.m256); }
+
+__forceinline const avxf operator&(const avxf& a, const avxf& b) { return _mm256_and_ps(a.m256,b.m256); }
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxf shuffle(const avxf& a, const __m256i &shuf) {
+ return _mm256_permutevar_ps(a, shuf);
+}
+
+template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> __forceinline const avxf shuffle(const avxf& a) {
+ return _mm256_permutevar_ps(a, _mm256_set_epi32( i7,i6,i5,i4 ,i3,i2,i1,i0));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const avxf shuffle(const avxf& a, const avxf& b) {
+ return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
+}
+template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const avxf shuffle(const avxf& a) {
+ return shuffle<i0,i1,i2,i3>(a,a);
+}
+template<size_t i0> __forceinline const avxf shuffle(const avxf& a, const avxf& b) {
+ return shuffle<i0,i0,i0,i0>(a, b);
+}
+template<size_t i0> __forceinline const avxf shuffle(const avxf& a) {
+ return shuffle<i0>(a,a);
+}
+
+template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> __forceinline const avxf permute(const avxf& a) {
+#ifdef __KERNEL_AVX2__
+ return _mm256_permutevar8x32_ps(a,_mm256_set_epi32( i7,i6,i5,i4 ,i3,i2,i1,i0));
+#else
+ float temp[8];
+ _mm256_storeu_ps((float*)&temp, a);
+ return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
+#endif
+}
+
+template<int S0, int S1, int S2, int S3,int S4,int S5,int S6, int S7>
+ccl_device_inline const avxf set_sign_bit(const avxf &a)
+{
+ return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31,S2 << 31,S1 << 31,S0 << 31);
+}
+
+template<size_t S0, size_t S1, size_t S2, size_t S3,size_t S4,size_t S5,size_t S6, size_t S7>
+ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
+{
+ return _mm256_blend_ps(a,b,S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7);
+}
+
+template<size_t S0, size_t S1, size_t S2, size_t S3 >
+ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
+{
+ return blend<S0,S1,S2,S3,S0,S1,S2,S3>(a,b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Ternary Operators
+////////////////////////////////////////////////////////////////////////////////
+__forceinline const avxf madd (const avxf& a, const avxf& b, const avxf& c) {
+#ifdef __KERNEL_AVX2__
+ return _mm256_fmadd_ps(a,b,c);
+#else
+ return c+(a*b);
+#endif
+}
+
+__forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
+#ifdef __KERNEL_AVX2__
+ return _mm256_fnmadd_ps(a, b, c);
+#else
+ return c-(a*b);
+#endif
+}
+#endif
+
+#ifndef _mm256_set_m128
+# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
+ _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
+#endif
+
+#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \
+ _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index 599222d..dfe4977 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -25,8 +25,6 @@
#include "util_transform.h"
#include "util_types.h"
-using namespace std;
-
CCL_NAMESPACE_BEGIN
/* 3D BoundBox */
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index 3ff2802..98c3a68 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -21,7 +21,7 @@
CCL_NAMESPACE_BEGIN
-static inline uint hash_int_2d(uint kx, uint ky)
+ccl_device_inline uint hash_int_2d(uint kx, uint ky)
{
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
@@ -44,11 +44,12 @@ static inline uint hash_int_2d(uint kx, uint ky)
#undef rot
}
-static inline uint hash_int(uint k)
+ccl_device_inline uint hash_int(uint k)
{
return hash_int_2d(k, 0);
}
+#ifndef __KERNEL_GPU__
static inline uint hash_string(const char *str)
{
uint i = 0, c;
@@ -58,6 +59,7 @@ static inline uint hash_string(const char *str)
return i;
}
+#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
index bb8a31c..c8efc55 100644
--- a/intern/cycles/util/util_image.h
+++ b/intern/cycles/util/util_image.h
@@ -21,11 +21,25 @@
#include <OpenImageIO/imageio.h>
+#include "util_vector.h"
+
CCL_NAMESPACE_BEGIN
OIIO_NAMESPACE_USING
+template<typename T>
+void util_image_resize_pixels(const vector<T>& input_pixels,
+ const size_t input_width,
+ const size_t input_height,
+ const size_t input_depth,
+ const size_t components,
+ vector<T> *output_pixels,
+ size_t *output_width,
+ size_t *output_height,
+ size_t *output_depth);
+
CCL_NAMESPACE_END
#endif /* __UTIL_IMAGE_H__ */
+#include "util_image_impl.h"
diff --git a/intern/cycles/util/util_image_impl.h b/intern/cycles/util/util_image_impl.h
new file mode 100644
index 0000000..73ecfda
--- /dev/null
+++ b/intern/cycles/util/util_image_impl.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_IMAGE_IMPL_H__
+#define __UTIL_IMAGE_IMPL_H__
+
+#include "util_algorithm.h"
+#include "util_debug.h"
+#include "util_image.h"
+
+CCL_NAMESPACE_BEGIN
+
+namespace {
+
+template<typename T>
+const T *util_image_read(const vector<T>& pixels,
+ const size_t width,
+ const size_t height,
+ const size_t /*depth*/,
+ const size_t components,
+ const size_t x, const size_t y, const size_t z) {
+ const size_t index = ((size_t)z * (width * height) +
+ (size_t)y * width +
+ (size_t)x) * components;
+ return &pixels[index];
+}
+
+template<typename T>
+void util_image_downscale_sample(const vector<T>& pixels,
+ const size_t width,
+ const size_t height,
+ const size_t depth,
+ const size_t components,
+ const size_t kernel_size,
+ const float x,
+ const float y,
+ const float z,
+ T *result)
+{
+ assert(components <= 4);
+ const size_t ix = (size_t)x,
+ iy = (size_t)y,
+ iz = (size_t)z;
+ /* TODO(sergey): Support something smarter than box filer. */
+ float accum[4] = {0};
+ size_t count = 0;
+ for(size_t dz = 0; dz < kernel_size; ++dz) {
+ for(size_t dy = 0; dy < kernel_size; ++dy) {
+ for(size_t dx = 0; dx < kernel_size; ++dx) {
+ const size_t nx = ix + dx,
+ ny = iy + dy,
+ nz = iz + dz;
+ if(nx >= width || ny >= height || nz >= depth) {
+ continue;
+ }
+ const T *pixel = util_image_read(pixels,
+ width, height, depth,
+ components,
+ nx, ny, nz);
+ for(size_t k = 0; k < components; ++k) {
+ accum[k] += pixel[k];
+ }
+ ++count;
+ }
+ }
+ }
+ const float inv_count = 1.0f / (float)count;
+ for(size_t k = 0; k < components; ++k) {
+ result[k] = T(accum[k] * inv_count);
+ }
+}
+
+template<typename T>
+void util_image_downscale_pixels(const vector<T>& input_pixels,
+ const size_t input_width,
+ const size_t input_height,
+ const size_t input_depth,
+ const size_t components,
+ const float inv_scale_factor,
+ const size_t output_width,
+ const size_t output_height,
+ const size_t output_depth,
+ vector<T> *output_pixels)
+{
+ const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f);
+ for(size_t z = 0; z < output_depth; ++z) {
+ for(size_t y = 0; y < output_height; ++y) {
+ for(size_t x = 0; x < output_width; ++x) {
+ const float input_x = (float)x * inv_scale_factor,
+ input_y = (float)y * inv_scale_factor,
+ input_z = (float)z * inv_scale_factor;
+ const size_t output_index =
+ (z * output_width * output_height +
+ y * output_width + x) * components;
+ util_image_downscale_sample(input_pixels,
+ input_width, input_height, input_depth,
+ components,
+ kernel_size,
+ input_x, input_y, input_z,
+ &output_pixels->at(output_index));
+ }
+ }
+ }
+}
+
+} /* namespace */
+
+template<typename T>
+void util_image_resize_pixels(const vector<T>& input_pixels,
+ const size_t input_width,
+ const size_t input_height,
+ const size_t input_depth,
+ const size_t components,
+ const float scale_factor,
+ vector<T> *output_pixels,
+ size_t *output_width,
+ size_t *output_height,
+ size_t *output_depth)
+{
+ /* Early output for case when no scaling is applied. */
+ if(scale_factor == 1.0f) {
+ *output_width = input_width;
+ *output_height = input_height;
+ *output_depth = input_depth;
+ *output_pixels = input_pixels;
+ return;
+ }
+ /* First of all, we calculate output image dimensions.
+ * We clamp them to be 1 pixel at least so we do not generate degenerate
+ * image.
+ */
+ *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1);
+ *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1);
+ *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1);
+ /* Prepare pixel storage for the result. */
+ const size_t num_output_pixels = ((*output_width) *
+ (*output_height) *
+ (*output_depth)) * components;
+ output_pixels->resize(num_output_pixels);
+ if(scale_factor < 1.0f) {
+ const float inv_scale_factor = 1.0f / scale_factor;
+ util_image_downscale_pixels(input_pixels,
+ input_width, input_height, input_depth,
+ components,
+ inv_scale_factor,
+ *output_width, *output_height, *output_depth,
+ output_pixels);
+ } else {
+ /* TODO(sergey): Needs implementation. */
+ }
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_IMAGE_IMPL_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 89a882d..2b81c8c 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -22,6 +22,11 @@
* Basic math functions on scalar and vector types. This header is used by
* both the kernel code when compiled as C++, and other C++ non-kernel code. */
+#ifndef __KERNEL_GPU__
+# include <cmath>
+#endif
+
+
#ifndef __KERNEL_OPENCL__
#include <float.h>
@@ -97,6 +102,9 @@ ccl_device_inline float fminf(float a, float b)
#ifndef __KERNEL_GPU__
+using std::isfinite;
+using std::isnan;
+
ccl_device_inline int abs(int x)
{
return (x > 0)? x: -x;
@@ -162,6 +170,11 @@ ccl_device_inline float max4(float a, float b, float c, float d)
return max(max(a, b), max(c, d));
}
+ccl_device_inline float max3(float3 a)
+{
+ return max(max(a.x, a.y), a.z);
+}
+
#ifndef __KERNEL_OPENCL__
ccl_device_inline int clamp(int a, int mn, int mx)
@@ -233,7 +246,7 @@ ccl_device_inline int mod(int x, int m)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline bool is_zero(const float2 a)
+ccl_device_inline bool is_zero(const float2& a)
{
return (a.x == 0.0f && a.y == 0.0f);
}
@@ -242,7 +255,7 @@ ccl_device_inline bool is_zero(const float2 a)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline float average(const float2 a)
+ccl_device_inline float average(const float2& a)
{
return (a.x + a.y)*(1.0f/2.0f);
}
@@ -251,58 +264,58 @@ ccl_device_inline float average(const float2 a)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline float2 operator-(const float2 a)
+ccl_device_inline float2 operator-(const float2& a)
{
return make_float2(-a.x, -a.y);
}
-ccl_device_inline float2 operator*(const float2 a, const float2 b)
+ccl_device_inline float2 operator*(const float2& a, const float2& b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
-ccl_device_inline float2 operator*(const float2 a, float f)
+ccl_device_inline float2 operator*(const float2& a, float f)
{
return make_float2(a.x*f, a.y*f);
}
-ccl_device_inline float2 operator*(float f, const float2 a)
+ccl_device_inline float2 operator*(float f, const float2& a)
{
return make_float2(a.x*f, a.y*f);
}
-ccl_device_inline float2 operator/(float f, const float2 a)
+ccl_device_inline float2 operator/(float f, const float2& a)
{
return make_float2(f/a.x, f/a.y);
}
-ccl_device_inline float2 operator/(const float2 a, float f)
+ccl_device_inline float2 operator/(const float2& a, float f)
{
float invf = 1.0f/f;
return make_float2(a.x*invf, a.y*invf);
}
-ccl_device_inline float2 operator/(const float2 a, const float2 b)
+ccl_device_inline float2 operator/(const float2& a, const float2& b)
{
return make_float2(a.x/b.x, a.y/b.y);
}
-ccl_device_inline float2 operator+(const float2 a, const float2 b)
+ccl_device_inline float2 operator+(const float2& a, const float2& b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
-ccl_device_inline float2 operator-(const float2 a, const float2 b)
+ccl_device_inline float2 operator-(const float2& a, const float2& b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
-ccl_device_inline float2 operator+=(float2& a, const float2 b)
+ccl_device_inline float2 operator+=(float2& a, const float2& b)
{
return a = a + b;
}
-ccl_device_inline float2 operator*=(float2& a, const float2 b)
+ccl_device_inline float2 operator*=(float2& a, const float2& b)
{
return a = a * b;
}
@@ -312,7 +325,7 @@ ccl_device_inline float2 operator*=(float2& a, float f)
return a = a * f;
}
-ccl_device_inline float2 operator/=(float2& a, const float2 b)
+ccl_device_inline float2 operator/=(float2& a, const float2& b)
{
return a = a / b;
}
@@ -324,12 +337,12 @@ ccl_device_inline float2 operator/=(float2& a, float f)
}
-ccl_device_inline float dot(const float2 a, const float2 b)
+ccl_device_inline float dot(const float2& a, const float2& b)
{
return a.x*b.x + a.y*b.y;
}
-ccl_device_inline float cross(const float2 a, const float2 b)
+ccl_device_inline float cross(const float2& a, const float2& b)
{
return (a.x*b.y - a.y*b.x);
}
@@ -343,59 +356,59 @@ ccl_device_inline bool operator==(const int2 a, const int2 b)
return (a.x == b.x && a.y == b.y);
}
-ccl_device_inline float len(const float2 a)
+ccl_device_inline float len(const float2& a)
{
return sqrtf(dot(a, a));
}
-ccl_device_inline float2 normalize(const float2 a)
+ccl_device_inline float2 normalize(const float2& a)
{
return a/len(a);
}
-ccl_device_inline float2 normalize_len(const float2 a, float *t)
+ccl_device_inline float2 normalize_len(const float2& a, float *t)
{
*t = len(a);
return a/(*t);
}
-ccl_device_inline float2 safe_normalize(const float2 a)
+ccl_device_inline float2 safe_normalize(const float2& a)
{
float t = len(a);
return (t != 0.0f)? a/t: a;
}
-ccl_device_inline bool operator==(const float2 a, const float2 b)
+ccl_device_inline bool operator==(const float2& a, const float2& b)
{
return (a.x == b.x && a.y == b.y);
}
-ccl_device_inline bool operator!=(const float2 a, const float2 b)
+ccl_device_inline bool operator!=(const float2& a, const float2& b)
{
return !(a == b);
}
-ccl_device_inline float2 min(float2 a, float2 b)
+ccl_device_inline float2 min(const float2& a, const float2& b)
{
return make_float2(min(a.x, b.x), min(a.y, b.y));
}
-ccl_device_inline float2 max(float2 a, float2 b)
+ccl_device_inline float2 max(const float2& a, const float2& b)
{
return make_float2(max(a.x, b.x), max(a.y, b.y));
}
-ccl_device_inline float2 clamp(float2 a, float2 mn, float2 mx)
+ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx)
{
return min(max(a, mn), mx);
}
-ccl_device_inline float2 fabs(float2 a)
+ccl_device_inline float2 fabs(const float2& a)
{
return make_float2(fabsf(a.x), fabsf(a.y));
}
-ccl_device_inline float2 as_float2(const float4 a)
+ccl_device_inline float2 as_float2(const float4& a)
{
return make_float2(a.x, a.y);
}
@@ -413,7 +426,7 @@ ccl_device_inline void print_float2(const char *label, const float2& a)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline float2 interp(float2 a, float2 b, float t)
+ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
{
return a + t*(b - a);
}
@@ -424,58 +437,95 @@ ccl_device_inline float2 interp(float2 a, float2 b, float t)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline float3 operator-(const float3 a)
+ccl_device_inline float3 operator-(const float3& a)
{
+#ifdef __KERNEL_SSE__
+ return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
+#else
return make_float3(-a.x, -a.y, -a.z);
+#endif
}
-ccl_device_inline float3 operator*(const float3 a, const float3 b)
+ccl_device_inline float3 operator*(const float3& a, const float3& b)
{
+#ifdef __KERNEL_SSE__
+ return float3(_mm_mul_ps(a.m128,b.m128));
+#else
return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
+#endif
}
-ccl_device_inline float3 operator*(const float3 a, float f)
+ccl_device_inline float3 operator*(const float3& a, const float f)
{
+#ifdef __KERNEL_SSE__
+ return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f)));
+#else
return make_float3(a.x*f, a.y*f, a.z*f);
+#endif
}
-ccl_device_inline float3 operator*(float f, const float3 a)
+ccl_device_inline float3 operator*(const float f, const float3& a)
{
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+ return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
+#else
return make_float3(a.x*f, a.y*f, a.z*f);
+#endif
}
-ccl_device_inline float3 operator/(float f, const float3 a)
+ccl_device_inline float3 operator/(const float f, const float3& a)
{
- return make_float3(f/a.x, f/a.y, f/a.z);
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+ __m128 rc = _mm_rcp_ps(a.m128);
+ return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
+#else
+ return make_float3(f / a.x, f / a.y, f / a.z);
+#endif
}
-ccl_device_inline float3 operator/(const float3 a, float f)
+ccl_device_inline float3 operator/(const float3& a, const float f)
{
float invf = 1.0f/f;
- return make_float3(a.x*invf, a.y*invf, a.z*invf);
+ return a * invf;
}
-ccl_device_inline float3 operator/(const float3 a, const float3 b)
+ccl_device_inline float3 operator/(const float3& a, const float3& b)
{
- return make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
+ /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+ __m128 rc = _mm_rcp_ps(b.m128);
+ return float3(_mm_mul_ps(a, rc));
+#else
+ return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
+#endif
}
-ccl_device_inline float3 operator+(const float3 a, const float3 b)
+ccl_device_inline float3 operator+(const float3& a, const float3& b)
{
- return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
+#ifdef __KERNEL_SSE__
+ return float3(_mm_add_ps(a.m128, b.m128));
+#else
+ return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+#endif
}
-ccl_device_inline float3 operator-(const float3 a, const float3 b)
+ccl_device_inline float3 operator-(const float3& a, const float3& b)
{
- return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
+#ifdef __KERNEL_SSE__
+ return float3(_mm_sub_ps(a.m128, b.m128));
+#else
+ return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+#endif
}
-ccl_device_inline float3 operator+=(float3& a, const float3 b)
+ccl_device_inline float3 operator+=(float3& a, const float3& b)
{
return a = a + b;
}
-ccl_device_inline float3 operator*=(float3& a, const float3 b)
+ccl_device_inline float3 operator*=(float3& a, const float3& b)
{
return a = a * b;
}
@@ -485,7 +535,7 @@ ccl_device_inline float3 operator*=(float3& a, float f)
return a = a * f;
}
-ccl_device_inline float3 operator/=(float3& a, const float3 b)
+ccl_device_inline float3 operator/=(float3& a, const float3& b)
{
return a = a / b;
}
@@ -496,7 +546,7 @@ ccl_device_inline float3 operator/=(float3& a, float f)
return a = a * invf;
}
-ccl_device_inline float dot(const float3 a, const float3 b)
+ccl_device_inline float dot(const float3& a, const float3& b)
{
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
@@ -505,7 +555,16 @@ ccl_device_inline float dot(const float3 a, const float3 b)
#endif
}
-ccl_device_inline float dot(const float4 a, const float4 b)
+ccl_device_inline float dot_xy(const float3& a, const float3& b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+ return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b));
+#else
+ return a.x*b.x + a.y*b.y;
+#endif
+}
+
+ccl_device_inline float dot(const float4& a, const float4& b)
{
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
@@ -514,7 +573,7 @@ ccl_device_inline float dot(const float4 a, const float4 b)
#endif
}
-ccl_device_inline float3 cross(const float3 a, const float3 b)
+ccl_device_inline float3 cross(const float3& a, const float3& b)
{
float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
return r;
@@ -538,12 +597,12 @@ ccl_device_inline float len_squared(const float3 a)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline float len_squared(const float4 a)
+ccl_device_inline float len_squared(const float4& a)
{
return dot(a, a);
}
-ccl_device_inline float3 normalize(const float3 a)
+ccl_device_inline float3 normalize(const float3& a)
{
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
@@ -563,13 +622,14 @@ ccl_device_inline float3 saturate3(float3 a)
ccl_device_inline float3 normalize_len(const float3 a, float *t)
{
*t = len(a);
- return a/(*t);
+ float x = 1.0f / *t;
+ return a*x;
}
ccl_device_inline float3 safe_normalize(const float3 a)
{
float t = len(a);
- return (t != 0.0f)? a/t: a;
+ return (t != 0.0f)? a * (1.0f/t) : a;
}
ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
@@ -580,7 +640,7 @@ ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline bool operator==(const float3 a, const float3 b)
+ccl_device_inline bool operator==(const float3& a, const float3& b)
{
#ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
@@ -589,12 +649,12 @@ ccl_device_inline bool operator==(const float3 a, const float3 b)
#endif
}
-ccl_device_inline bool operator!=(const float3 a, const float3 b)
+ccl_device_inline bool operator!=(const float3& a, const float3& b)
{
return !(a == b);
}
-ccl_device_inline float3 min(float3 a, float3 b)
+ccl_device_inline float3 min(const float3& a, const float3& b)
{
#ifdef __KERNEL_SSE__
return _mm_min_ps(a.m128, b.m128);
@@ -603,7 +663,7 @@ ccl_device_inline float3 min(float3 a, float3 b)
#endif
}
-ccl_device_inline float3 max(float3 a, float3 b)
+ccl_device_inline float3 max(const float3& a, const float3& b)
{
#ifdef __KERNEL_SSE__
return _mm_max_ps(a.m128, b.m128);
@@ -612,12 +672,12 @@ ccl_device_inline float3 max(float3 a, float3 b)
#endif
}
-ccl_device_inline float3 clamp(float3 a, float3 mn, float3 mx)
+ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx)
{
return min(max(a, mn), mx);
}
-ccl_device_inline float3 fabs(float3 a)
+ccl_device_inline float3 fabs(const float3& a)
{
#ifdef __KERNEL_SSE__
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
@@ -670,7 +730,7 @@ ccl_device_inline float3 interp(float3 a, float3 b, float t)
#ifndef __KERNEL_OPENCL__
-ccl_device_inline float3 mix(float3 a, float3 b, float t)
+ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
{
return a + t*(b - a);
}
@@ -754,7 +814,7 @@ ccl_device_inline float4 operator*(const float4& a, const float4& b)
ccl_device_inline float4 operator*(const float4& a, float f)
{
-#ifdef __KERNEL_SSE__
+#if defined(__KERNEL_SSE__)
return a * make_float4(f);
#else
return make_float4(a.x*f, a.y*f, a.z*f, a.w*f);
@@ -833,7 +893,7 @@ ccl_device_inline int4 operator<(const float4& a, const float4& b)
#endif
}
-ccl_device_inline int4 operator>=(float4 a, float4 b)
+ccl_device_inline int4 operator>=(const float4& a, const float4& b)
{
#ifdef __KERNEL_SSE__
return _mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)); /* todo: avoid cvt */
@@ -851,7 +911,7 @@ ccl_device_inline int4 operator<=(const float4& a, const float4& b)
#endif
}
-ccl_device_inline bool operator==(const float4 a, const float4 b)
+ccl_device_inline bool operator==(const float4& a, const float4& b)
{
#ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
@@ -893,23 +953,23 @@ ccl_device_inline float average(const float4& a)
return reduce_add(a) * 0.25f;
}
-ccl_device_inline float len(const float4 a)
+ccl_device_inline float len(const float4& a)
{
return sqrtf(dot(a, a));
}
-ccl_device_inline float4 normalize(const float4 a)
+ccl_device_inline float4 normalize(const float4& a)
{
return a/len(a);
}
-ccl_device_inline float4 safe_normalize(const float4 a)
+ccl_device_inline float4 safe_normalize(const float4& a)
{
float t = len(a);
return (t != 0.0f)? a/t: a;
}
-ccl_device_inline float4 min(float4 a, float4 b)
+ccl_device_inline float4 min(const float4& a, const float4& b)
{
#ifdef __KERNEL_SSE__
return _mm_min_ps(a.m128, b.m128);
@@ -918,7 +978,7 @@ ccl_device_inline float4 min(float4 a, float4 b)
#endif
}
-ccl_device_inline float4 max(float4 a, float4 b)
+ccl_device_inline float4 max(const float4& a, const float4& b)
{
#ifdef __KERNEL_SSE__
return _mm_max_ps(a.m128, b.m128);
@@ -1181,6 +1241,20 @@ ccl_device_inline float __uint_as_float(uint i)
return u.f;
}
+/* Versions of functions which are safe for fast math. */
+ccl_device_inline bool isnan_safe(float f)
+{
+ unsigned int x = __float_as_uint(f);
+ return (x << 1) > 0xff000000u;
+}
+
+ccl_device_inline bool isfinite_safe(float f)
+{
+ /* By IEEE 754 rule, 2*Inf equals Inf */
+ unsigned int x = __float_as_uint(f);
+ return (f == f) && (x == 0 || (f != 2.0f*f));
+}
+
/* Interpolation */
template<class A, class B> A lerp(const A& a, const A& b, const B& t)
@@ -1190,7 +1264,7 @@ template<class A, class B> A lerp(const A& a, const A& b, const B& t)
/* Triangle */
-ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
+ccl_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3)
{
return len(cross(v3 - v2, v1 - v2))*0.5f;
}
@@ -1529,7 +1603,7 @@ ccl_device_inline bool ray_triangle_intersect_uv(
ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, float ray_mint, float ray_maxt,
float3 quad_P, float3 quad_u, float3 quad_v, float3 quad_n,
- float3 *isect_P, float *isect_t)
+ float3 *isect_P, float *isect_t, float *isect_u, float *isect_v)
{
float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
if(t < ray_mint || t > ray_maxt)
@@ -1537,13 +1611,19 @@ ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, float ray_mint, f
float3 hit = ray_P + t*ray_D;
float3 inplane = hit - quad_P;
- if(fabsf(dot(inplane, quad_u) / dot(quad_u, quad_u)) > 0.5f)
+
+ float u = dot(inplane, quad_u) / dot(quad_u, quad_u) + 0.5f;
+ if(u < 0.0f || u > 1.0f)
return false;
- if(fabsf(dot(inplane, quad_v) / dot(quad_v, quad_v)) > 0.5f)
+
+ float v = dot(inplane, quad_v) / dot(quad_v, quad_v) + 0.5f;
+ if(v < 0.0f || v > 1.0f)
return false;
if(isect_P) *isect_P = hit;
if(isect_t) *isect_t = t;
+ if(isect_u) *isect_u = u;
+ if(isect_v) *isect_v = v;
return true;
}
@@ -1584,6 +1664,14 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
ccl_device_inline int util_max_axis(float3 vec)
{
+#ifdef __KERNEL_SSE__
+ __m128 a = shuffle<0,0,1,1>(vec.m128);
+ __m128 b = shuffle<1,2,2,1>(vec.m128);
+ __m128 c = _mm_cmpgt_ps(a, b);
+ int mask = _mm_movemask_ps(c) & 0x7;
+ static const char tab[8] = {2, 2, 2, 0, 1, 2, 1, 0};
+ return tab[mask];
+#else
if(vec.x > vec.y) {
if(vec.x > vec.z)
return 0;
@@ -1596,6 +1684,7 @@ ccl_device_inline int util_max_axis(float3 vec)
else
return 2;
}
+#endif
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index f6fba03..5df262f 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -36,6 +36,9 @@ OIIO_NAMESPACE_USING
#else
# define DIR_SEP '/'
# include <dirent.h>
+# include <pwd.h>
+# include <unistd.h>
+# include <sys/types.h>
#endif
#ifdef HAVE_SHLWAPI_H
@@ -63,6 +66,7 @@ typedef struct stat path_stat_t;
static string cached_path = "";
static string cached_user_path = "";
+static string cached_xdg_cache_path = "";
namespace {
@@ -331,6 +335,23 @@ static char *path_specials(const string& sub)
return NULL;
}
+#if defined(__linux__) || defined(__APPLE__)
+static string path_xdg_cache_get()
+{
+ const char *home = getenv("XDG_CACHE_HOME");
+ if(home) {
+ return string(home);
+ }
+ else {
+ home = getenv("HOME");
+ if(home == NULL) {
+ home = getpwuid(getuid())->pw_dir;
+ }
+ return path_join(string(home), ".cache");
+ }
+}
+#endif
+
void path_init(const string& path, const string& user_path)
{
cached_path = path;
@@ -364,6 +385,24 @@ string path_user_get(const string& sub)
return path_join(cached_user_path, sub);
}
+string path_cache_get(const string& sub)
+{
+#if defined(__linux__) || defined(__APPLE__)
+ if(cached_xdg_cache_path == "") {
+ cached_xdg_cache_path = path_xdg_cache_get();
+ }
+ string result = path_join(cached_xdg_cache_path, "cycles");
+ return path_join(result, sub);
+#else
+ /* TODO(sergey): What that should be on Windows? */
+ return path_user_get(path_join("cache", sub));
+#endif
+}
+
+#if defined(__linux__) || defined(__APPLE__)
+string path_xdg_home_get(const string& sub = "");
+#endif
+
string path_filename(const string& path)
{
size_t index = find_last_slash(path);
@@ -718,9 +757,9 @@ uint64_t path_modified_time(const string& path)
{
path_stat_t st;
if(path_stat(path, &st) != 0) {
- return st.st_mtime;
+ return 0;
}
- return 0;
+ return st.st_mtime;
}
bool path_remove(const string& path)
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
index 1ff76b6..70dbb5a 100644
--- a/intern/cycles/util/util_path.h
+++ b/intern/cycles/util/util_path.h
@@ -35,6 +35,7 @@ CCL_NAMESPACE_BEGIN
void path_init(const string& path = "", const string& user_path = "");
string path_get(const string& sub = "");
string path_user_get(const string& sub = "");
+string path_cache_get(const string& sub = "");
/* path string manipulation */
string path_filename(const string& path);
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 4ae1d61..1421505 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -34,12 +34,12 @@ class Progress {
public:
Progress()
{
- tile = 0;
- sample = 0;
+ pixel_samples = 0;
+ total_pixel_samples = 0;
+ current_tile_sample = 0;
+ finished_tiles = 0;
start_time = time_dt();
- total_time = 0.0;
- render_time = 0.0;
- tile_time = 0.0;
+ render_start_time = time_dt();
status = "Initializing";
substatus = "";
sync_status = "";
@@ -62,22 +62,22 @@ public:
thread_scoped_lock lock(progress.progress_mutex);
progress.get_status(status, substatus);
- progress.get_tile(tile, total_time, render_time, tile_time);
- sample = progress.get_sample();
+ pixel_samples = progress.pixel_samples;
+ total_pixel_samples = progress.total_pixel_samples;
+ current_tile_sample = progress.get_current_sample();
return *this;
}
void reset()
{
- tile = 0;
- sample = 0;
+ pixel_samples = 0;
+ total_pixel_samples = 0;
+ current_tile_sample = 0;
+ finished_tiles = 0;
start_time = time_dt();
render_start_time = time_dt();
- total_time = 0.0;
- render_time = 0.0;
- tile_time = 0.0;
status = "Initializing";
substatus = "";
sync_status = "";
@@ -139,69 +139,93 @@ public:
/* tile and timing information */
- void set_start_time(double start_time_)
+ void set_start_time()
{
thread_scoped_lock lock(progress_mutex);
- start_time = start_time_;
+ start_time = time_dt();
}
- void set_render_start_time(double render_start_time_)
+ void set_render_start_time()
{
thread_scoped_lock lock(progress_mutex);
- render_start_time = render_start_time_;
+ render_start_time = time_dt();
}
- void set_tile(int tile_, double tile_time_)
+ void add_skip_time(const scoped_timer &start_timer, bool only_render)
{
- thread_scoped_lock lock(progress_mutex);
+ double skip_time = time_dt() - start_timer.get_start();
- tile = tile_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
- tile_time = tile_time_;
+ render_start_time += skip_time;
+ if(!only_render) {
+ start_time += skip_time;
+ }
}
- void get_tile(int& tile_, double& total_time_, double& render_time_, double& tile_time_)
+ void get_time(double& total_time_, double& render_time_)
{
thread_scoped_lock lock(progress_mutex);
- tile_ = tile;
- total_time_ = (total_time > 0.0)? total_time: 0.0;
- render_time_ = (render_time > 0.0)? render_time: 0.0;
- tile_time_ = tile_time;
+ total_time_ = time_dt() - start_time;
+ render_time_ = time_dt() - render_start_time;
}
- void get_time(double& total_time_, double& render_time_)
+ void reset_sample()
{
- total_time_ = (total_time > 0.0)? total_time: 0.0;
- render_time_ = (render_time > 0.0)? render_time: 0.0;
+ thread_scoped_lock lock(progress_mutex);
+
+ pixel_samples = 0;
+ current_tile_sample = 0;
+ finished_tiles = 0;
}
- void reset_sample()
+ void set_total_pixel_samples(uint64_t total_pixel_samples_)
{
thread_scoped_lock lock(progress_mutex);
- sample = 0;
+ total_pixel_samples = total_pixel_samples_;
}
- void increment_sample()
+ float get_progress()
+ {
+ if(total_pixel_samples > 0) {
+ return ((float) pixel_samples) / total_pixel_samples;
+ }
+ return 0.0f;
+ }
+
+ void add_samples(uint64_t pixel_samples_, int tile_sample)
{
thread_scoped_lock lock(progress_mutex);
- sample++;
+ pixel_samples += pixel_samples_;
+ current_tile_sample = tile_sample;
}
- void increment_sample_update()
+ void add_samples_update(uint64_t pixel_samples_, int tile_sample)
{
- increment_sample();
+ add_samples(pixel_samples_, tile_sample);
set_update();
}
- int get_sample()
+ void add_finished_tile()
+ {
+ thread_scoped_lock lock(progress_mutex);
+
+ finished_tiles++;
+ }
+
+ int get_current_sample()
+ {
+ /* Note that the value here always belongs to the last tile that updated,
+ * so it's only useful if there is only one active tile. */
+ return current_tile_sample;
+ }
+
+ int get_finished_tiles()
{
- return sample;
+ return finished_tiles;
}
/* status messages */
@@ -212,8 +236,6 @@ public:
thread_scoped_lock lock(progress_mutex);
status = status_;
substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -224,8 +246,6 @@ public:
{
thread_scoped_lock lock(progress_mutex);
substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -237,8 +257,6 @@ public:
thread_scoped_lock lock(progress_mutex);
sync_status = status_;
sync_substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -250,8 +268,6 @@ public:
{
thread_scoped_lock lock(progress_mutex);
sync_substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -292,12 +308,19 @@ protected:
function<void(void)> update_cb;
function<void(void)> cancel_cb;
- int tile; /* counter for rendered tiles */
- int sample; /* counter of rendered samples, global for all tiles */
+ /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
+ * This makes the progress estimate more accurate when tiles with different sizes are used.
+ *
+ * total_pixel_samples is the total amount of pixel samples that will be rendered. */
+ uint64_t pixel_samples, total_pixel_samples;
+ /* Stores the current sample count of the last tile that called the update function.
+ * It's used to display the sample count if only one tile is active. */
+ int current_tile_sample;
+ /* Stores the number of tiles that's already finished.
+ * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */
+ int finished_tiles;
double start_time, render_start_time;
- double total_time, render_time;
- double tile_time;
string status;
string substatus;
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 36da155..756bd15 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -71,7 +71,7 @@ __forceinline operator int ( ) const { return std::numeric_limits<
#define _lzcnt_u64 __lzcnt64
#endif
-#if defined(_WIN32) && !defined(__MINGW32__)
+#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__)
__forceinline int __popcnt(int in) {
return _mm_popcnt_u32(in);
@@ -229,7 +229,7 @@ __forceinline int __btr(int v, int i) {
int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
}
-#if defined(__KERNEL_64_BIT__) || defined(__APPLE__)
+#if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && !(defined(__ILP32__) && defined(__x86_64__))
__forceinline size_t __bsf(size_t v) {
size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
}
@@ -271,7 +271,7 @@ __forceinline unsigned int bitscan(unsigned int v) {
#endif
}
-#if defined(__KERNEL_64_BIT__) || defined(__APPLE__)
+#if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && !(defined(__ILP32__) && defined(__x86_64__))
__forceinline size_t bitscan(size_t v) {
#if defined(__KERNEL_AVX2__)
#if defined(__KERNEL_64_BIT__)
@@ -313,7 +313,7 @@ __forceinline unsigned int __bscf(unsigned int& v)
return i;
}
-#if defined(__KERNEL_64_BIT__) || defined(__APPLE__)
+#if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && !(defined(__ILP32__) && defined(__x86_64__))
__forceinline size_t __bscf(size_t& v)
{
size_t i = bitscan(v);
@@ -455,6 +455,7 @@ CCL_NAMESPACE_END
#include "util_sseb.h"
#include "util_ssei.h"
#include "util_ssef.h"
+#include "util_avxf.h"
#endif /* __UTIL_SIMD_TYPES_H__ */
diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h
index b970b01..c21a848 100644
--- a/intern/cycles/util/util_stats.h
+++ b/intern/cycles/util/util_stats.h
@@ -29,13 +29,13 @@ public:
explicit Stats(static_init_t) {}
void mem_alloc(size_t size) {
- atomic_add_z(&mem_used, size);
+ atomic_add_and_fetch_z(&mem_used, size);
atomic_update_max_z(&mem_peak, mem_used);
}
void mem_free(size_t size) {
assert(mem_used >= size);
- atomic_sub_z(&mem_used, size);
+ atomic_sub_and_fetch_z(&mem_used, size);
}
size_t mem_used;
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index d5fac9a..87d885c 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -89,6 +89,22 @@ int system_cpu_thread_count()
return count;
}
+unsigned short system_cpu_process_groups(unsigned short max_groups,
+ unsigned short *groups)
+{
+#ifdef _WIN32
+ unsigned short group_count = max_groups;
+ if(!GetProcessGroupAffinity(GetCurrentProcess(), &group_count, groups)) {
+ return 0;
+ }
+ return group_count;
+#else
+ (void) max_groups;
+ (void) groups;
+ return 0;
+#endif
+}
+
#if !defined(_WIN32) || defined(FREE_WINDOWS)
static void __cpuid(int data[4], int selector)
{
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 557aab6..ff61b26 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -30,6 +30,10 @@ int system_cpu_group_thread_count(int group);
/* Get total number of threads in all groups. */
int system_cpu_thread_count();
+/* Get current process groups. */
+unsigned short system_cpu_process_groups(unsigned short max_groups,
+ unsigned short *grpups);
+
string system_cpu_brand_string();
int system_cpu_bits();
bool system_cpu_support_sse2();
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 352ba81..0d1fed3 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -195,7 +195,8 @@ void TaskScheduler::init(int num_threads)
if(users == 0) {
do_exit = false;
- if(num_threads == 0) {
+ const bool use_auto_threads = (num_threads == 0);
+ if(use_auto_threads) {
/* automatic number of threads */
num_threads = system_cpu_thread_count();
}
@@ -204,7 +205,18 @@ void TaskScheduler::init(int num_threads)
/* launch threads that will be waiting for work */
threads.resize(num_threads);
- int num_groups = system_cpu_group_count();
+ const int num_groups = system_cpu_group_count();
+ unsigned short num_process_groups;
+ vector<unsigned short> process_groups;
+ int current_group_threads;
+ if(num_groups > 1) {
+ process_groups.resize(num_groups);
+ num_process_groups = system_cpu_process_groups(num_groups,
+ &process_groups[0]);
+ if(num_process_groups == 1) {
+ current_group_threads = system_cpu_group_thread_count(process_groups[0]);
+ }
+ }
int thread_index = 0;
for(int group = 0; group < num_groups; ++group) {
/* NOTE: That's not really efficient from threading point of view,
@@ -218,9 +230,25 @@ void TaskScheduler::init(int num_threads)
group_thread < num_group_threads && thread_index < threads.size();
++group_thread, ++thread_index)
{
+ /* NOTE: Thread group of -1 means we would not force thread affinity. */
+ int thread_group;
+ if(num_groups == 1) {
+ /* Use default affinity if there's only one CPU group in the system. */
+ thread_group = -1;
+ }
+ else if(use_auto_threads &&
+ num_process_groups == 1 &&
+ num_threads <= current_group_threads)
+ {
+ /* If we fit into curent CPU group we also don't force any affinity. */
+ thread_group = -1;
+ }
+ else {
+ thread_group = group;
+ }
threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run,
thread_index + 1),
- group);
+ thread_group);
}
}
}
diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h
index a5b074b..6579824 100644
--- a/intern/cycles/util/util_time.h
+++ b/intern/cycles/util/util_time.h
@@ -29,7 +29,7 @@ void time_sleep(double t);
class scoped_timer {
public:
- explicit scoped_timer(double *value) : value_(value)
+ explicit scoped_timer(double *value = NULL) : value_(value)
{
time_start_ = time_dt();
}
@@ -40,6 +40,12 @@ public:
*value_ = time_dt() - time_start_;
}
}
+
+ double get_start() const
+ {
+ return time_start_;
+ }
+
protected:
double *value_;
double time_start_;
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index bfc8f55..a0695f2 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -73,22 +73,59 @@ ccl_device_inline float3 transform_perspective(const Transform *t, const float3
ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
{
+ /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
+ ssef x, y, z, w, aa;
+ aa = a.m128;
+
+ x = _mm_loadu_ps(&t->x.x);
+ y = _mm_loadu_ps(&t->y.x);
+ z = _mm_loadu_ps(&t->z.x);
+ w = _mm_loadu_ps(&t->w.x);
+
+ _MM_TRANSPOSE4_PS(x, y, z, w);
+
+ ssef tmp = shuffle<0>(aa) * x;
+ tmp = madd(shuffle<1>(aa), y, tmp);
+ tmp = madd(shuffle<2>(aa), z, tmp);
+ tmp += w;
+
+ return float3(tmp.m128);
+#else
float3 c = make_float3(
a.x*t->x.x + a.y*t->x.y + a.z*t->x.z + t->x.w,
a.x*t->y.x + a.y*t->y.y + a.z*t->y.z + t->y.w,
a.x*t->z.x + a.y*t->z.y + a.z*t->z.z + t->z.w);
return c;
+#endif
}
ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
+ ssef x, y, z, w, aa;
+ aa = a.m128;
+ x = _mm_loadu_ps(&t->x.x);
+ y = _mm_loadu_ps(&t->y.x);
+ z = _mm_loadu_ps(&t->z.x);
+ w = _mm_setzero_ps();
+
+ _MM_TRANSPOSE4_PS(x, y, z, w);
+
+ ssef tmp = shuffle<0>(aa) * x;
+ tmp = madd(shuffle<1>(aa), y, tmp);
+ tmp = madd(shuffle<2>(aa), z, tmp);
+
+ return float3(tmp.m128);
+#else
float3 c = make_float3(
a.x*t->x.x + a.y*t->x.y + a.z*t->x.z,
a.x*t->y.x + a.y*t->y.y + a.z*t->y.z,
a.x*t->z.x + a.y*t->z.y + a.z*t->z.z);
return c;
+#endif
}
ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 6af65f8..a000fae 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -174,6 +174,9 @@ struct ccl_try_align(16) int3 {
__forceinline int3(const __m128i a) : m128(a) {}
__forceinline operator const __m128i&(void) const { return m128; }
__forceinline operator __m128i&(void) { return m128; }
+
+ int3(const int3& a) { m128 = a.m128; }
+ int3& operator =(const int3& a) { m128 = a.m128; return *this; }
#else
int x, y, z, w;
#endif
@@ -193,6 +196,9 @@ struct ccl_try_align(16) int4 {
__forceinline int4(const __m128i a) : m128(a) {}
__forceinline operator const __m128i&(void) const { return m128; }
__forceinline operator __m128i&(void) { return m128; }
+
+ int4(const int4& a) : m128(a.m128) {}
+ int4& operator=(const int4& a) { m128 = a.m128; return *this; }
#else
int x, y, z, w;
#endif
@@ -237,9 +243,12 @@ struct ccl_try_align(16) float3 {
};
__forceinline float3() {}
- __forceinline float3(const __m128 a) : m128(a) {}
+ __forceinline float3(const __m128& a) : m128(a) {}
__forceinline operator const __m128&(void) const { return m128; }
__forceinline operator __m128&(void) { return m128; }
+
+ __forceinline float3(const float3& a) : m128(a.m128) {}
+ __forceinline float3& operator =(const float3& a) { m128 = a.m128; return *this; }
#else
float x, y, z, w;
#endif
@@ -259,6 +268,10 @@ struct ccl_try_align(16) float4 {
__forceinline float4(const __m128 a) : m128(a) {}
__forceinline operator const __m128&(void) const { return m128; }
__forceinline operator __m128&(void) { return m128; }
+
+ __forceinline float4(const float4& a) : m128(a.m128) {}
+ __forceinline float4& operator =(const float4& a) { m128 = a.m128; return *this; }
+
#else
float x, y, z, w;
#endif
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
index 186a177..d609c73 100644
--- a/intern/cycles/util/util_version.h
+++ b/intern/cycles/util/util_version.h
@@ -21,9 +21,9 @@
CCL_NAMESPACE_BEGIN
-#define CYCLES_VERSION_MAJOR 1
-#define CYCLES_VERSION_MINOR 7
-#define CYCLES_VERSION_PATCH 0
+#define CYCLES_VERSION_MAJOR 1
+#define CYCLES_VERSION_MINOR 8
+#define CYCLES_VERSION_PATCH 1
#define CYCLES_MAKE_VERSION_STRING2(a,b,c) #a "." #b "." #c
#define CYCLES_MAKE_VERSION_STRING(a,b,c) CYCLES_MAKE_VERSION_STRING2(a,b,c)
diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp
index ee5b3fd..4de8483 100644
--- a/intern/cycles/util/util_windows.cpp
+++ b/intern/cycles/util/util_windows.cpp
@@ -28,6 +28,7 @@ CCL_NAMESPACE_BEGIN
tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount;
tGetActiveProcessorCount *GetActiveProcessorCount;
tSetThreadGroupAffinity *SetThreadGroupAffinity;
+tGetProcessGroupAffinity *GetProcessGroupAffinity;
#endif
static WORD GetActiveProcessorGroupCount_stub()
@@ -50,6 +51,18 @@ static BOOL SetThreadGroupAffinity_stub(
return TRUE;
}
+static BOOL GetProcessGroupAffinity_stub(HANDLE hProcess,
+ PUSHORT GroupCount,
+ PUSHORT GroupArray)
+{
+ if(*GroupCount < 1) {
+ return FALSE;
+ }
+ *GroupCount = 1;
+ GroupArray[0] = 0;
+ return TRUE;
+}
+
static bool supports_numa()
{
#ifndef _M_X64
@@ -72,6 +85,7 @@ void util_windows_init_numa_groups()
GetActiveProcessorGroupCount = GetActiveProcessorGroupCount_stub;
GetActiveProcessorCount = GetActiveProcessorCount_stub;
SetThreadGroupAffinity = SetThreadGroupAffinity_stub;
+ GetProcessGroupAffinity = GetProcessGroupAffinity_stub;
return;
}
HMODULE kernel = GetModuleHandleA("kernel32.dll");
@@ -79,6 +93,7 @@ void util_windows_init_numa_groups()
READ_SYMBOL(GetActiveProcessorGroupCount);
READ_SYMBOL(GetActiveProcessorCount);
READ_SYMBOL(SetThreadGroupAffinity);
+ READ_SYMBOL(GetProcessGroupAffinity);
# undef READ_SUMBOL
#endif
}
diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h
index ac61d53..7ea3e65 100644
--- a/intern/cycles/util/util_windows.h
+++ b/intern/cycles/util/util_windows.h
@@ -39,10 +39,14 @@ typedef DWORD tGetActiveProcessorCount(WORD GroupNumber);
typedef BOOL tSetThreadGroupAffinity(HANDLE hThread,
const GROUP_AFFINITY *GroupAffinity,
PGROUP_AFFINITY PreviousGroupAffinity);
+typedef BOOL tGetProcessGroupAffinity(HANDLE hProcess,
+ PUSHORT GroupCount,
+ PUSHORT GroupArray);
extern tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount;
extern tGetActiveProcessorCount *GetActiveProcessorCount;
extern tSetThreadGroupAffinity *SetThreadGroupAffinity;
+extern tGetProcessGroupAffinity *GetProcessGroupAffinity;
#endif
/* Make sure NUMA and processor groups API is initialized. */
diff --git a/intern/elbeem/intern/solver_class.h b/intern/elbeem/intern/solver_class.h
index 593fea1..2b2e214 100644
--- a/intern/elbeem/intern/solver_class.h
+++ b/intern/elbeem/intern/solver_class.h
@@ -332,7 +332,7 @@ class LbmFsgrSolver :
void debugMarkCellCall(int level, int vi,int vj,int vk);
// loop over grid, stream&collide update
- void mainLoop(int lev);
+ void mainLoop(const int lev);
// change time step size
void adaptTimestep();
//! init mObjectSpeeds for current parametrization
diff --git a/intern/elbeem/intern/solver_main.cpp b/intern/elbeem/intern/solver_main.cpp
index 55a8d3e..68f7c04 100644
--- a/intern/elbeem/intern/solver_main.cpp
+++ b/intern/elbeem/intern/solver_main.cpp
@@ -355,7 +355,7 @@ void LbmFsgrSolver::fineAdvance()
//! fine step function
/*****************************************************************************/
void
-LbmFsgrSolver::mainLoop(int lev)
+LbmFsgrSolver::mainLoop(const int lev)
{
// loops over _only inner_ cells -----------------------------------------------------------------------------------
@@ -376,13 +376,16 @@ LbmFsgrSolver::mainLoop(int lev)
// main loop region
const bool doReduce = true;
const int gridLoopBound=1;
+ int calcNumInvIfCells = 0;
+ LbmFloat calcInitialMass = 0;
GRID_REGION_INIT();
#if PARALLEL==1
-#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
+ const int gDebugLevel = ::gDebugLevel;
+#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
reduction(+: \
calcCurrentMass,calcCurrentVolume, \
calcCellsFilled,calcCellsEmptied, \
- calcNumUsedCells )
+ calcNumUsedCells,calcNumInvIfCells,calcInitialMass)
GRID_REGION_START();
#else // PARALLEL==1
GRID_REGION_START();
@@ -468,7 +471,7 @@ LbmFsgrSolver::mainLoop(int lev)
calcCurrentMass += iniRho;
calcCurrentVolume += 1.0;
calcNumUsedCells++;
- mInitialMass += iniRho;
+ calcInitialMass += iniRho;
// dont treat cell until next step
continue;
}
@@ -479,7 +482,7 @@ LbmFsgrSolver::mainLoop(int lev)
if(isnotValid) {
// remove fluid cells, shouldnt be here anyway
LbmFloat fluidRho = m[0]; FORDF1 { fluidRho += m[l]; }
- mInitialMass -= fluidRho;
+ calcInitialMass -= fluidRho;
const LbmFloat iniRho = 0.0;
RAC(tcel, dMass) = RAC(tcel, dFfrac) = iniRho;
RAC(tcel, dFlux) = FLUX_INIT;
@@ -608,8 +611,8 @@ LbmFsgrSolver::mainLoop(int lev)
// read distribution funtions of adjacent cells = stream step
DEFAULT_STREAM;
- if((nbored & CFFluid)==0) { newFlag |= CFNoNbFluid; mNumInvIfCells++; }
- if((nbored & CFEmpty)==0) { newFlag |= CFNoNbEmpty; mNumInvIfCells++; }
+ if((nbored & CFFluid)==0) { newFlag |= CFNoNbFluid; calcNumInvIfCells++; }
+ if((nbored & CFEmpty)==0) { newFlag |= CFNoNbEmpty; calcNumInvIfCells++; }
// calculate mass exchange for interface cells
LbmFloat myfrac = RAC(ccel,dFfrac);
@@ -809,7 +812,7 @@ LbmFsgrSolver::mainLoop(int lev)
// fill if cells in inflow region
if(myfrac<0.5) {
mass += 0.25;
- mInitialMass += 0.25;
+ calcInitialMass += 0.25;
}
const int OId = oldFlag>>24;
const LbmVec vel(mObjectSpeeds[OId]);
@@ -865,10 +868,8 @@ LbmFsgrSolver::mainLoop(int lev)
// physical drop model
if(mPartUsePhysModel) {
LbmFloat realWorldFac = (mLevel[lev].simCellSize / mLevel[lev].timestep);
- LbmFloat rux = (ux * realWorldFac);
- LbmFloat ruy = (uy * realWorldFac);
- LbmFloat ruz = (uz * realWorldFac);
- LbmFloat rl = norm(ntlVec3Gfx(rux,ruy,ruz));
+ LbmVec ru(ux * realWorldFac, uy * realWorldFac, uz * realWorldFac);
+ LbmFloat rl = norm(ru);
basethresh *= rl;
// reduce probability in outer region?
@@ -960,14 +961,15 @@ LbmFsgrSolver::mainLoop(int lev)
// average normal & velocity
// -> mostly along velocity dir, many into surface
// fluid velocity (not normalized!)
- LbmVec flvelVel = LbmVec(ux,uy,uz);
+ LbmVec flvelVel(ux,uy,uz);
LbmFloat flvelLen = norm(flvelVel);
// surface normal
- LbmVec normVel = LbmVec(surfaceNormal[0],surfaceNormal[1],surfaceNormal[2]);
+ LbmVec normVel(surfaceNormal[0],surfaceNormal[1],surfaceNormal[2]);
normalize(normVel);
LbmFloat normScale = (0.01+flvelLen);
// jitter vector, 0.2 * flvel
- LbmVec jittVel = LbmVec(jx,jy,jz)*(0.05+flvelLen)*0.1;
+ LbmVec jittVel(jx,jy,jz);
+ jittVel *= (0.05+flvelLen)*0.1;
// weighten velocities
const LbmFloat flvelWeight = 0.9;
LbmVec newpartVel = normVel*normScale*(1.-flvelWeight) + flvelVel*(flvelWeight) + jittVel;
@@ -1013,7 +1015,7 @@ LbmFsgrSolver::mainLoop(int lev)
if( (mass) <= (rho * ( -FSGR_MAGICNR)) ) { ifemptied = 1; }
if(oldFlag & (CFMbndOutflow)) {
- mInitialMass -= mass;
+ calcInitialMass -= mass;
mass = myfrac = 0.0;
iffilled = 0; ifemptied = 1;
}
@@ -1105,6 +1107,8 @@ LbmFsgrSolver::mainLoop(int lev)
mNumFilledCells = calcCellsFilled;
mNumEmptiedCells = calcCellsEmptied;
mNumUsedCells = calcNumUsedCells;
+ mNumInvIfCells += calcNumInvIfCells;
+ mInitialMass += calcInitialMass;
}
@@ -1121,7 +1125,8 @@ LbmFsgrSolver::preinitGrids()
GRID_REGION_INIT();
#if PARALLEL==1
-#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
+ const int gDebugLevel = ::gDebugLevel;
+#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
reduction(+: \
calcCurrentMass,calcCurrentVolume, \
calcCellsFilled,calcCellsEmptied, \
@@ -1158,7 +1163,8 @@ LbmFsgrSolver::standingFluidPreinit()
GRID_REGION_INIT();
#if PARALLEL==1
-#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
+ const int gDebugLevel = ::gDebugLevel;
+#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
reduction(+: \
calcCurrentMass,calcCurrentVolume, \
calcCellsFilled,calcCellsEmptied, \
diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c
index 1933e9d..76b7e07 100644
--- a/intern/guardedalloc/intern/mallocn_guarded_impl.c
+++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c
@@ -505,8 +505,8 @@ static void make_memhead_header(MemHead *memh, size_t len, const char *str)
memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + len);
memt->tag3 = MEMTAG3;
- atomic_add_u(&totblock, 1);
- atomic_add_z(&mem_in_use, len);
+ atomic_add_and_fetch_u(&totblock, 1);
+ atomic_add_and_fetch_z(&mem_in_use, len);
mem_lock_thread();
addtail(membase, &memh->next);
@@ -638,7 +638,7 @@ void *MEM_guarded_mapallocN(size_t len, const char *str)
if (memh != (MemHead *)-1) {
make_memhead_header(memh, len, str);
memh->mmap = 1;
- atomic_add_z(&mmap_in_use, len);
+ atomic_add_and_fetch_z(&mmap_in_use, len);
mem_lock_thread();
peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem;
mem_unlock_thread();
@@ -1007,8 +1007,8 @@ static void rem_memblock(MemHead *memh)
}
mem_unlock_thread();
- atomic_sub_u(&totblock, 1);
- atomic_sub_z(&mem_in_use, memh->len);
+ atomic_sub_and_fetch_u(&totblock, 1);
+ atomic_sub_and_fetch_z(&mem_in_use, memh->len);
#ifdef DEBUG_MEMDUPLINAME
if (memh->need_free_name)
@@ -1016,7 +1016,7 @@ static void rem_memblock(MemHead *memh)
#endif
if (memh->mmap) {
- atomic_sub_z(&mmap_in_use, memh->len);
+ atomic_sub_and_fetch_z(&mmap_in_use, memh->len);
#if defined(WIN32)
/* our windows mmap implementation is not thread safe */
mem_lock_thread();
diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
index a80d67c..ce8a5b2 100644
--- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c
+++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
@@ -142,11 +142,11 @@ void MEM_lockfree_freeN(void *vmemh)
return;
}
- atomic_sub_u(&totblock, 1);
- atomic_sub_z(&mem_in_use, len);
+ atomic_sub_and_fetch_u(&totblock, 1);
+ atomic_sub_and_fetch_z(&mem_in_use, len);
if (MEMHEAD_IS_MMAP(memh)) {
- atomic_sub_z(&mmap_in_use, len);
+ atomic_sub_and_fetch_z(&mmap_in_use, len);
#if defined(WIN32)
/* our windows mmap implementation is not thread safe */
mem_lock_thread();
@@ -287,8 +287,8 @@ void *MEM_lockfree_callocN(size_t len, const char *str)
if (LIKELY(memh)) {
memh->len = len;
- atomic_add_u(&totblock, 1);
- atomic_add_z(&mem_in_use, len);
+ atomic_add_and_fetch_u(&totblock, 1);
+ atomic_add_and_fetch_z(&mem_in_use, len);
update_maximum(&peak_mem, mem_in_use);
return PTR_FROM_MEMHEAD(memh);
@@ -312,8 +312,8 @@ void *MEM_lockfree_mallocN(size_t len, const char *str)
}
memh->len = len;
- atomic_add_u(&totblock, 1);
- atomic_add_z(&mem_in_use, len);
+ atomic_add_and_fetch_u(&totblock, 1);
+ atomic_add_and_fetch_z(&mem_in_use, len);
update_maximum(&peak_mem, mem_in_use);
return PTR_FROM_MEMHEAD(memh);
@@ -361,8 +361,8 @@ void *MEM_lockfree_mallocN_aligned(size_t len, size_t alignment, const char *str
memh->len = len | (size_t) MEMHEAD_ALIGN_FLAG;
memh->alignment = (short) alignment;
- atomic_add_u(&totblock, 1);
- atomic_add_z(&mem_in_use, len);
+ atomic_add_and_fetch_u(&totblock, 1);
+ atomic_add_and_fetch_z(&mem_in_use, len);
update_maximum(&peak_mem, mem_in_use);
return PTR_FROM_MEMHEAD(memh);
@@ -396,9 +396,9 @@ void *MEM_lockfree_mapallocN(size_t len, const char *str)
if (memh != (MemHead *)-1) {
memh->len = len | (size_t) MEMHEAD_MMAP_FLAG;
- atomic_add_u(&totblock, 1);
- atomic_add_z(&mem_in_use, len);
- atomic_add_z(&mmap_in_use, len);
+ atomic_add_and_fetch_u(&totblock, 1);
+ atomic_add_and_fetch_z(&mem_in_use, len);
+ atomic_add_and_fetch_z(&mmap_in_use, len);
update_maximum(&peak_mem, mem_in_use);
update_maximum(&peak_mem, mmap_in_use);
diff --git a/intern/iksolver/intern/IK_QSegment.h b/intern/iksolver/intern/IK_QSegment.h
index 74f157a..247807d 100644
--- a/intern/iksolver/intern/IK_QSegment.h
+++ b/intern/iksolver/intern/IK_QSegment.h
@@ -60,6 +60,7 @@
class IK_QSegment
{
public:
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW
virtual ~IK_QSegment();
// start: a user defined translation
diff --git a/intern/iksolver/intern/IK_Solver.cpp b/intern/iksolver/intern/IK_Solver.cpp
index cefb8c7..a00db4f 100644
--- a/intern/iksolver/intern/IK_Solver.cpp
+++ b/intern/iksolver/intern/IK_Solver.cpp
@@ -42,6 +42,7 @@ using namespace std;
class IK_QSolver {
public:
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW
IK_QSolver() : root(NULL) {
}
diff --git a/release/scripts/addons/render_auto_tile_size.py b/release/scripts/addons/render_auto_tile_size.py
index 8aef71d..3625c0e 100644
--- a/release/scripts/addons/render_auto_tile_size.py
+++ b/release/scripts/addons/render_auto_tile_size.py
@@ -146,7 +146,9 @@ def ats_poll(context):
def engine_is_gpu(engine, device, userpref):
- return engine == 'CYCLES' and device == 'GPU' and userpref.system.compute_device_type != 'NONE'
+ if engine == 'CYCLES' and device == 'GPU':
+ return userpref.addons['cycles'].preferences.has_active_device()
+ return False
def get_tilesize_prop(engine, device, userpref):
@@ -206,11 +208,7 @@ def get_threads(context, device):
userpref = context.user_preferences
if engine_is_gpu(engine, device, userpref):
- gpu_device_str = userpref.system.compute_device
- if 'MULTI' in gpu_device_str:
- threads = int(gpu_device_str.split('_')[-1])
- else:
- threads = 1
+ threads = userpref.addons['cycles'].preferences.get_num_gpu_devices()
else:
threads = render.threads
diff --git a/release/scripts/startup/bl_operators/wm.py b/release/scripts/startup/bl_operators/wm.py
index 1c97d21..343fcdb 100644
--- a/release/scripts/startup/bl_operators/wm.py
+++ b/release/scripts/startup/bl_operators/wm.py
@@ -2163,3 +2163,32 @@ class WM_OT_addon_expand(Operator):
info["show_expanded"] = not info["show_expanded"]
return {'FINISHED'}
+
+class WM_OT_addon_userpref_show(Operator):
+ "Show add-on user preferences"
+ bl_idname = "wm.addon_userpref_show"
+ bl_label = ""
+ bl_options = {'INTERNAL'}
+
+ module = StringProperty(
+ name="Module",
+ description="Module name of the add-on to expand",
+ )
+
+ def execute(self, context):
+ import addon_utils
+
+ module_name = self.module
+
+ modules = addon_utils.modules(refresh=False)
+ mod = addon_utils.addons_fake_modules.get(module_name)
+ if mod is not None:
+ info = addon_utils.module_bl_info(mod)
+ info["show_expanded"] = True
+
+ bpy.context.user_preferences.active_section = 'ADDONS'
+ context.window_manager.addon_filter = 'All'
+ context.window_manager.addon_search = info["name"]
+ bpy.ops.screen.userpref_show('INVOKE_DEFAULT')
+
+ return {'FINISHED'}
diff --git a/release/scripts/startup/bl_ui/space_userpref.py b/release/scripts/startup/bl_ui/space_userpref.py
index dcafac6..bdbb633 100644
--- a/release/scripts/startup/bl_ui/space_userpref.py
+++ b/release/scripts/startup/bl_ui/space_userpref.py
@@ -429,12 +429,8 @@ class USERPREF_PT_system(Panel):
col.separator()
- if hasattr(system, "compute_device_type"):
- col.label(text="Compute Device:")
- col.row().prop(system, "compute_device_type", expand=True)
- sub = col.row()
- sub.active = system.compute_device_type != 'CPU'
- sub.prop(system, "compute_device", text="")
+ if userpref.addons.find('cycles') != -1:
+ userpref.addons['cycles'].preferences.draw_impl(col, context)
if hasattr(system, "opensubdiv_compute_type"):
col.label(text="OpenSubdiv compute:")
diff --git a/source/blender/blenkernel/BKE_armature.h b/source/blender/blenkernel/BKE_armature.h
index c232310..78d6f6c 100644
--- a/source/blender/blenkernel/BKE_armature.h
+++ b/source/blender/blenkernel/BKE_armature.h
@@ -97,6 +97,7 @@ void BKE_armature_where_is(struct bArmature *arm);
void BKE_armature_where_is_bone(struct Bone *bone, struct Bone *prevbone, const bool use_recursion);
void BKE_pose_clear_pointers(struct bPose *pose);
void BKE_pose_rebuild(struct Object *ob, struct bArmature *arm);
+void BKE_pose_rebuild_ex(struct Object *ob, struct bArmature *arm, const bool sort_bones);
void BKE_pose_where_is(struct Scene *scene, struct Object *ob);
void BKE_pose_where_is_bone(struct Scene *scene, struct Object *ob, struct bPoseChannel *pchan, float ctime, bool do_extra);
void BKE_pose_where_is_bone_tail(struct bPoseChannel *pchan);
diff --git a/source/blender/blenkernel/BKE_blender_version.h b/source/blender/blenkernel/BKE_blender_version.h
index aba5f9e..6dad0d7 100644
--- a/source/blender/blenkernel/BKE_blender_version.h
+++ b/source/blender/blenkernel/BKE_blender_version.h
@@ -35,7 +35,7 @@
/* used by packaging tools */
/* can be left blank, otherwise a,b,c... etc with no quotes */
-#define BLENDER_VERSION_CHAR a
+#define BLENDER_VERSION_CHAR b
/* alpha/beta/rc/release, docs use this */
#define BLENDER_VERSION_CYCLE release
diff --git a/source/blender/blenkernel/intern/armature.c b/source/blender/blenkernel/intern/armature.c
index c644fe0..aaec3a9 100644
--- a/source/blender/blenkernel/intern/armature.c
+++ b/source/blender/blenkernel/intern/armature.c
@@ -1916,7 +1916,7 @@ void BKE_pose_clear_pointers(bPose *pose)
/* only after leave editmode, duplicating, validating older files, library syncing */
/* NOTE: pose->flag is set for it */
-void BKE_pose_rebuild(Object *ob, bArmature *arm)
+void BKE_pose_rebuild_ex(Object *ob, bArmature *arm, const bool sort_bones)
{
Bone *bone;
bPose *pose;
@@ -1963,8 +1963,9 @@ void BKE_pose_rebuild(Object *ob, bArmature *arm)
#ifdef WITH_LEGACY_DEPSGRAPH
/* the sorting */
/* Sorting for new dependnecy graph is done on the scene graph level. */
- if (counter > 1)
+ if (counter > 1 && sort_bones) {
DAG_pose_sort(ob);
+ }
#endif
ob->pose->flag &= ~POSE_RECALC;
@@ -1973,6 +1974,11 @@ void BKE_pose_rebuild(Object *ob, bArmature *arm)
BKE_pose_channels_hash_make(ob->pose);
}
+void BKE_pose_rebuild(Object *ob, bArmature *arm)
+{
+ BKE_pose_rebuild_ex(ob, arm, true);
+}
+
/* ********************** THE POSE SOLVER ******************* */
/* loc/rot/size to given mat4 */
diff --git a/source/blender/blenkernel/intern/depsgraph.c b/source/blender/blenkernel/intern/depsgraph.c
index 5f8332d..8a16b3e 100644
--- a/source/blender/blenkernel/intern/depsgraph.c
+++ b/source/blender/blenkernel/intern/depsgraph.c
@@ -800,6 +800,10 @@ static void build_dag_object(DagForest *dag, DagNode *scenenode, Main *bmain, Sc
/* Actual code uses get_collider_cache */
dag_add_collision_relations(dag, scene, ob, node, part->collision_group, ob->lay, eModifierType_Collision, NULL, true, "Particle Collision");
}
+ else if ((psys->flag & PSYS_HAIR_DYNAMICS) && psys->clmd && psys->clmd->coll_parms) {
+ /* Hair uses cloth simulation, i.e. get_collision_objects */
+ dag_add_collision_relations(dag, scene, ob, node, psys->clmd->coll_parms->group, ob->lay | scene->lay, eModifierType_Collision, NULL, true, "Hair Collision");
+ }
dag_add_forcefield_relations(dag, scene, ob, node, part->effector_weights, part->type == PART_HAIR, 0, "Particle Force Field");
@@ -3284,7 +3288,7 @@ void DAG_threaded_update_handle_node_updated(void *node_v,
for (itA = node->child; itA; itA = itA->next) {
DagNode *child_node = itA->node;
if (child_node != node) {
- atomic_sub_uint32(&child_node->num_pending_parents, 1);
+ atomic_sub_and_fetch_uint32(&child_node->num_pending_parents, 1);
if (child_node->num_pending_parents == 0) {
bool need_schedule;
diff --git a/source/blender/blenkernel/intern/dynamicpaint.c b/source/blender/blenkernel/intern/dynamicpaint.c
index 51bc5fc..5d660be 100644
--- a/source/blender/blenkernel/intern/dynamicpaint.c
+++ b/source/blender/blenkernel/intern/dynamicpaint.c
@@ -2257,7 +2257,7 @@ static void dynamic_paint_create_uv_surface_neighbor_cb(void *userdata, const in
* to non--1 *before* its tri_index is set (i.e. that it cannot be used a neighbour).
*/
tPoint->neighbour_pixel = ind - 1;
- atomic_add_uint32(&tPoint->neighbour_pixel, 1);
+ atomic_add_and_fetch_uint32(&tPoint->neighbour_pixel, 1);
tPoint->tri_index = i;
/* Now calculate pixel data for this pixel as it was on polygon surface */
@@ -2283,7 +2283,7 @@ static void dynamic_paint_create_uv_surface_neighbor_cb(void *userdata, const in
/* Increase the final number of active surface points if relevant. */
if (tPoint->tri_index != -1)
- atomic_add_uint32(active_points, 1);
+ atomic_add_and_fetch_uint32(active_points, 1);
}
}
diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c
index fa113ef..a3fe73e 100644
--- a/source/blender/blenkernel/intern/mesh_evaluate.c
+++ b/source/blender/blenkernel/intern/mesh_evaluate.c
@@ -58,6 +58,7 @@
#include "BLI_strict_flags.h"
+#include "atomic_ops.h"
#include "mikktspace.h"
// #define DEBUG_TIME
@@ -236,7 +237,9 @@ static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx)
const float fac = saacos(-dot_v3v3(cur_edge, prev_edge));
/* accumulate */
- madd_v3_v3fl(vnors[ml[i].v], pnor, fac);
+ for (int k = 3; k--; ) {
+ atomic_add_and_fetch_fl(&vnors[ml[i].v][k], pnor[k] * fac);
+ }
prev_edge = cur_edge;
}
}
diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c
index ff69f38..4fe4d6e 100644
--- a/source/blender/blenkernel/intern/pbvh.c
+++ b/source/blender/blenkernel/intern/pbvh.c
@@ -977,7 +977,7 @@ static void pbvh_update_normals_accum_task_cb(void *userdata, const int n)
* Not exact equivalent though, since atomicity is only ensured for one component
* of the vector at a time, but here it shall not make any sensible difference. */
for (int k = 3; k--; ) {
- atomic_add_fl(&vnors[v][k], fn[k]);
+ atomic_add_and_fetch_fl(&vnors[v][k], fn[k]);
}
}
}
diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c
index 436cd2b..fc2d967 100644
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@@ -237,7 +237,7 @@ static void task_pool_num_decrease(TaskPool *pool, size_t done)
BLI_assert(pool->num >= done);
pool->num -= done;
- atomic_sub_z(&pool->currently_running_tasks, done);
+ atomic_sub_and_fetch_z(&pool->currently_running_tasks, done);
pool->done += done;
if (pool->num == 0)
@@ -292,7 +292,7 @@ static bool task_scheduler_thread_wait_pop(TaskScheduler *scheduler, Task **task
continue;
}
- if (atomic_add_z(&pool->currently_running_tasks, 1) <= pool->num_threads ||
+ if (atomic_add_and_fetch_z(&pool->currently_running_tasks, 1) <= pool->num_threads ||
pool->num_threads == 0)
{
*task = current_task;
@@ -301,7 +301,7 @@ static bool task_scheduler_thread_wait_pop(TaskScheduler *scheduler, Task **task
break;
}
else {
- atomic_sub_z(&pool->currently_running_tasks, 1);
+ atomic_sub_and_fetch_z(&pool->currently_running_tasks, 1);
}
}
if (!found_task)
@@ -669,7 +669,7 @@ void BLI_task_pool_work_and_wait(TaskPool *pool)
/* if found task, do it, otherwise wait until other tasks are done */
if (found_task) {
/* run task */
- atomic_add_z(&pool->currently_running_tasks, 1);
+ atomic_add_and_fetch_z(&pool->currently_running_tasks, 1);
work_task->run(pool, work_task->taskdata, 0);
/* delete task */
diff --git a/source/blender/compositor/intern/COM_ExecutionGroup.cpp b/source/blender/compositor/intern/COM_ExecutionGroup.cpp
index e5c2b8a..9a47c6b 100644
--- a/source/blender/compositor/intern/COM_ExecutionGroup.cpp
+++ b/source/blender/compositor/intern/COM_ExecutionGroup.cpp
@@ -383,7 +383,7 @@ void ExecutionGroup::finalizeChunkExecution(int chunkNumber, MemoryBuffer **memo
if (this->m_chunkExecutionStates[chunkNumber] == COM_ES_SCHEDULED)
this->m_chunkExecutionStates[chunkNumber] = COM_ES_EXECUTED;
- atomic_add_u(&this->m_chunksFinished, 1);
+ atomic_add_and_fetch_u(&this->m_chunksFinished, 1);
if (memoryBuffers) {
for (unsigned int index = 0; index < this->m_cachedMaxReadBufferOffset; index++) {
MemoryBuffer *buffer = memoryBuffers[index];
diff --git a/source/blender/compositor/operations/COM_TextureOperation.cpp b/source/blender/compositor/operations/COM_TextureOperation.cpp
index bba5c87..6bfd8ae 100644
--- a/source/blender/compositor/operations/COM_TextureOperation.cpp
+++ b/source/blender/compositor/operations/COM_TextureOperation.cpp
@@ -118,7 +118,7 @@ void TextureBaseOperation::executePixelSampled(float output[4], float x, float y
* interpolaiton and (b) in such configuration multitex() sinply floor's the value
* which often produces artifacts.
*/
- if ((m_texture->imaflag & TEX_INTERPOL) == 0) {
+ if (m_texture != NULL && (m_texture->imaflag & TEX_INTERPOL) == 0) {
u += 0.5f / cx;
v += 0.5f / cy;
}
diff --git a/source/blender/depsgraph/CMakeLists.txt b/source/blender/depsgraph/CMakeLists.txt
index fd2a521..e635256 100644
--- a/source/blender/depsgraph/CMakeLists.txt
+++ b/source/blender/depsgraph/CMakeLists.txt
@@ -43,8 +43,13 @@ set(SRC
intern/builder/deg_builder.cc
intern/builder/deg_builder_cycle.cc
intern/builder/deg_builder_nodes.cc
+ intern/builder/deg_builder_nodes_rig.cc
+ intern/builder/deg_builder_nodes_scene.cc
intern/builder/deg_builder_pchanmap.cc
intern/builder/deg_builder_relations.cc
+ intern/builder/deg_builder_relations_keys.cc
+ intern/builder/deg_builder_relations_rig.cc
+ intern/builder/deg_builder_relations_scene.cc
intern/builder/deg_builder_transitive.cc
intern/debug/deg_debug_graphviz.cc
intern/eval/deg_eval.cc
diff --git a/source/blender/depsgraph/DEG_depsgraph_build.h b/source/blender/depsgraph/DEG_depsgraph_build.h
index 0945da4..fdc8654 100644
--- a/source/blender/depsgraph/DEG_depsgraph_build.h
+++ b/source/blender/depsgraph/DEG_depsgraph_build.h
@@ -51,8 +51,12 @@ extern "C" {
/* Graph Building -------------------------------- */
-/* Build depsgraph for the given scene, and dump results in given graph container */
-void DEG_graph_build_from_scene(struct Depsgraph *graph, struct Main *bmain, struct Scene *scene);
+/* Build depsgraph for the given scene, and dump results in given
+ * graph container.
+ */
+void DEG_graph_build_from_scene(struct Depsgraph *graph,
+ struct Main *bmain,
+ struct Scene *scene);
/* Tag relations from the given graph for update. */
void DEG_graph_tag_relations_update(struct Depsgraph *graph);
@@ -85,31 +89,69 @@ struct CacheFile;
struct Object;
typedef enum eDepsSceneComponentType {
- DEG_SCENE_COMP_PARAMETERS, /* Parameters Component - Default when nothing else fits (i.e. just SDNA property setting) */
- DEG_SCENE_COMP_ANIMATION, /* Animation Component */ // XXX: merge in with parameters?
- DEG_SCENE_COMP_SEQUENCER, /* Sequencer Component (Scene Only) */
+ /* Parameters Component - Default when nothing else fits
+ * (i.e. just SDNA property setting).
+ */
+ DEG_SCENE_COMP_PARAMETERS,
+ /* Animation Component
+ * TODO(sergey): merge in with parameters?
+ */
+ DEG_SCENE_COMP_ANIMATION,
+ /* Sequencer Component (Scene Only). */
+ DEG_SCENE_COMP_SEQUENCER,
} eDepsSceneComponentType;
typedef enum eDepsObjectComponentType {
- DEG_OB_COMP_PARAMETERS, /* Parameters Component - Default when nothing else fits (i.e. just SDNA property setting) */
- DEG_OB_COMP_PROXY, /* Generic "Proxy-Inherit" Component */ // XXX: Also for instancing of subgraphs?
- DEG_OB_COMP_ANIMATION, /* Animation Component */ // XXX: merge in with parameters?
- DEG_OB_COMP_TRANSFORM, /* Transform Component (Parenting/Constraints) */
- DEG_OB_COMP_GEOMETRY, /* Geometry Component (DerivedMesh/Displist) */
-
+ /* Parameters Component - Default when nothing else fits
+ * (i.e. just SDNA property setting).
+ */
+ DEG_OB_COMP_PARAMETERS,
+ /* Generic "Proxy-Inherit" Component.
+ * TODO(sergey): Also for instancing of subgraphs?
+ */
+ DEG_OB_COMP_PROXY,
+ /* Animation Component.
+ *
+ * TODO(sergey): merge in with parameters?
+ */
+ DEG_OB_COMP_ANIMATION,
+ /* Transform Component (Parenting/Constraints) */
+ DEG_OB_COMP_TRANSFORM,
+ /* Geometry Component (DerivedMesh/Displist) */
+ DEG_OB_COMP_GEOMETRY,
+
/* Evaluation-Related Outer Types (with Subdata) */
- DEG_OB_COMP_EVAL_POSE, /* Pose Component - Owner/Container of Bones Eval */
- DEG_OB_COMP_BONE, /* Bone Component - Child/Subcomponent of Pose */
-
- DEG_OB_COMP_EVAL_PARTICLES, /* Particle Systems Component */
- DEG_OB_COMP_SHADING, /* Material Shading Component */
- DEG_OB_COMP_CACHE, /* Cache Component */
+
+ /* Pose Component - Owner/Container of Bones Eval */
+ DEG_OB_COMP_EVAL_POSE,
+ /* Bone Component - Child/Subcomponent of Pose */
+ DEG_OB_COMP_BONE,
+
+ /* Particle Systems Component */
+ DEG_OB_COMP_EVAL_PARTICLES,
+ /* Material Shading Component */
+ DEG_OB_COMP_SHADING,
+ /* Cache Component */
+ DEG_OB_COMP_CACHE,
} eDepsObjectComponentType;
-void DEG_add_scene_relation(struct DepsNodeHandle *node, struct Scene *scene, eDepsSceneComponentType component, const char *description);
-void DEG_add_object_relation(struct DepsNodeHandle *node, struct Object *ob, eDepsObjectComponentType component, const char *description);
-void DEG_add_bone_relation(struct DepsNodeHandle *handle, struct Object *ob, const char *bone_name, eDepsObjectComponentType component, const char *description);
-void DEG_add_object_cache_relation(struct DepsNodeHandle *handle, struct CacheFile *cache_file, eDepsObjectComponentType component, const char *description);
+void DEG_add_scene_relation(struct DepsNodeHandle *node,
+ struct Scene *scene,
+ eDepsSceneComponentType component,
+ const char *description);
+void DEG_add_object_relation(struct DepsNodeHandle *node, struct
+ Object *ob,
+ eDepsObjectComponentType component,
+ const char *description);
+void DEG_add_bone_relation(struct DepsNodeHandle *handle,
+ struct Object *ob,
+ const char *bone_name,
+ eDepsObjectComponentType component,
+ const char *description);
+void DEG_add_object_cache_relation(struct DepsNodeHandle *handle,
+ struct CacheFile *cache_file,
+ eDepsObjectComponentType component,
+ const char *description);
/* TODO(sergey): Remove once all geometry update is granular. */
void DEG_add_special_eval_flag(struct Depsgraph *graph, struct ID *id, short flag);
@@ -117,8 +159,22 @@ void DEG_add_special_eval_flag(struct Depsgraph *graph, struct ID *id, short fla
/* Utility functions for physics modifiers */
typedef bool (*DEG_CollobjFilterFunction)(struct Object *obj, struct ModifierData *md);
-void DEG_add_collision_relations(struct DepsNodeHandle *handle, struct Scene *scene, Object *ob, struct Group *group, int layer, unsigned int modifier_type, DEG_CollobjFilterFunction fn, bool dupli, const char *name);
-void DEG_add_forcefield_relations(struct DepsNodeHandle *handle, struct Scene *scene, Object *ob, struct EffectorWeights *eff, bool add_absorption, int skip_forcefield, const char *name);
+void DEG_add_collision_relations(struct DepsNodeHandle *handle,
+ struct Scene *scene,
+ Object *ob,
+ struct Group *group,
+ int layer,
+ unsigned int modifier_type,
+ DEG_CollobjFilterFunction fn,
+ bool dupli,
+ const char *name);
+void DEG_add_forcefield_relations(struct DepsNodeHandle *handle,
+ struct Scene *scene,
+ Object *ob,
+ struct EffectorWeights *eff,
+ bool add_absorption,
+ int skip_forcefield,
+ const char *name);
/* ************************************************ */
diff --git a/source/blender/depsgraph/intern/builder/deg_builder.cc b/source/blender/depsgraph/intern/builder/deg_builder.cc
index 6169100..cb2f057 100644
--- a/source/blender/depsgraph/intern/builder/deg_builder.cc
+++ b/source/blender/depsgraph/intern/builder/deg_builder.cc
@@ -34,6 +34,8 @@
#include <stack>
#include "DNA_anim_types.h"
+#include "DNA_object_types.h"
+#include "DNA_ID.h"
#include "BLI_utildefines.h"
#include "BLI_ghash.h"
@@ -46,6 +48,8 @@
#include "util/deg_util_foreach.h"
+#include <cstdio>
+
namespace DEG {
string deg_fcurve_id_name(const FCurve *fcu)
@@ -56,10 +60,46 @@ string deg_fcurve_id_name(const FCurve *fcu)
return string(fcu->rna_path) + index_buf;
}
+static bool check_object_needs_evaluation(Object *object)
+{
+ if (object->recalc & OB_RECALC_ALL) {
+ /* Object is tagged for update anyway, no need to re-tag it. */
+ return false;
+ }
+ if (object->type == OB_MESH) {
+ return object->derivedFinal == NULL;
+ }
+ else if (ELEM(object->type,
+ OB_CURVE, OB_SURF, OB_FONT, OB_MBALL, OB_LATTICE))
+ {
+ return object->curve_cache == NULL;
+ }
+ return false;
+}
+
void deg_graph_build_finalize(Depsgraph *graph)
{
+ /* STEP 1: Make sure new invisible dependencies are ready for use.
+ *
+ * TODO(sergey): This might do a bit of extra tagging, but it's kinda nice
+ * to do it ahead of a time and don't spend time on flushing updates on
+ * every frame change.
+ */
+ GHASH_FOREACH_BEGIN(IDDepsNode *, id_node, graph->id_hash)
+ {
+ if (id_node->layers == 0) {
+ ID *id = id_node->id;
+ if (GS(id->name) == ID_OB) {
+ Object *object = (Object *)id;
+ if (check_object_needs_evaluation(object)) {
+ id_node->tag_update(graph);
+ }
+ }
+ }
+ }
+ GHASH_FOREACH_END();
+ /* STEP 2: Flush visibility layers from children to parent. */
std::stack<OperationDepsNode *> stack;
-
foreach (OperationDepsNode *node, graph->operations) {
IDDepsNode *id_node = node->owner->owner;
node->done = 0;
@@ -78,7 +118,6 @@ void deg_graph_build_finalize(Depsgraph *graph)
node->owner->layers = id_node->layers;
id_node->id->tag |= LIB_TAG_DOIT;
}
-
while (!stack.empty()) {
OperationDepsNode *node = stack.top();
stack.pop();
@@ -104,8 +143,9 @@ void deg_graph_build_finalize(Depsgraph *graph)
}
}
}
-
- /* Re-tag IDs for update if it was tagged before the relations update tag. */
+ /* STEP 3: Re-tag IDs for update if it was tagged before the relations
+ * update tag.
+ */
GHASH_FOREACH_BEGIN(IDDepsNode *, id_node, graph->id_hash)
{
GHASH_FOREACH_BEGIN(ComponentDepsNode *, comp, id_node->components)
@@ -114,12 +154,21 @@ void deg_graph_build_finalize(Depsgraph *graph)
}
GHASH_FOREACH_END();
- ID *id = id_node->id;
- if (id->tag & LIB_TAG_ID_RECALC_ALL &&
- id->tag & LIB_TAG_DOIT)
- {
- id_node->tag_update(graph);
- id->tag &= ~LIB_TAG_DOIT;
+ if ((id_node->layers & graph->layers) != 0) {
+ ID *id = id_node->id;
+ if ((id->tag & LIB_TAG_ID_RECALC_ALL) &&
+ (id->tag & LIB_TAG_DOIT))
+ {
+ id_node->tag_update(graph);
+ id->tag &= ~LIB_TAG_DOIT;
+ }
+ else if (GS(id->name) == ID_OB) {
+ Object *object = (Object *)id;
+ if (object->recalc & OB_RECALC_ALL) {
+ id_node->tag_update(graph);
+ id->tag &= ~LIB_TAG_DOIT;
+ }
+ }
}
id_node->finalize_build();
}
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_cycle.cc b/source/blender/depsgraph/intern/builder/deg_builder_cycle.cc
index 225cc64..9b37aaa 100644
--- a/source/blender/depsgraph/intern/builder/deg_builder_cycle.cc
+++ b/source/blender/depsgraph/intern/builder/deg_builder_cycle.cc
@@ -56,12 +56,14 @@ struct StackEntry {
void deg_graph_detect_cycles(Depsgraph *graph)
{
- /* Not is not visited at all during traversal. */
- const int NODE_NOT_VISITED = 0;
- /* Node has been visited during traversal and not in current stack. */
- const int NODE_VISITED = 1;
- /* Node has been visited during traversal and is in current stack. */
- const int NODE_IN_STACK = 2;
+ enum {
+ /* Not is not visited at all during traversal. */
+ NODE_NOT_VISITED = 0,
+ /* Node has been visited during traversal and not in current stack. */
+ NODE_VISITED = 1,
+ /* Node has been visited during traversal and is in current stack. */
+ NODE_IN_STACK = 2,
+ };
std::stack<StackEntry> traversal_stack;
foreach (OperationDepsNode *node, graph->operations) {
@@ -77,21 +79,23 @@ void deg_graph_detect_cycles(Depsgraph *graph)
entry.from = NULL;
entry.via_relation = NULL;
traversal_stack.push(entry);
- node->done = NODE_IN_STACK;
+ node->tag = NODE_IN_STACK;
}
else {
- node->done = NODE_NOT_VISITED;
+ node->tag = NODE_NOT_VISITED;
}
+ node->done = 0;
}
while (!traversal_stack.empty()) {
- StackEntry &entry = traversal_stack.top();
+ StackEntry& entry = traversal_stack.top();
OperationDepsNode *node = entry.node;
bool all_child_traversed = true;
- foreach (DepsRelation *rel, node->outlinks) {
+ for (int i = node->done; i < node->outlinks.size(); ++i) {
+ DepsRelation *rel = node->outlinks[i];
if (rel->to->type == DEPSNODE_TYPE_OPERATION) {
OperationDepsNode *to = (OperationDepsNode *)rel->to;
- if (to->done == NODE_IN_STACK) {
+ if (to->tag == NODE_IN_STACK) {
printf("Dependency cycle detected:\n");
printf(" '%s' depends on '%s' through '%s'\n",
to->full_identifier().c_str(),
@@ -107,23 +111,24 @@ void deg_graph_detect_cycles(Depsgraph *graph)
current->via_relation->name);
current = current->from;
}
- /* TODO(sergey): So called roussian rlette cycle solver. */
+ /* TODO(sergey): So called russian roulette cycle solver. */
rel->flag |= DEPSREL_FLAG_CYCLIC;
}
- else if (to->done == NODE_NOT_VISITED) {
+ else if (to->tag == NODE_NOT_VISITED) {
StackEntry new_entry;
new_entry.node = to;
new_entry.from = &entry;
new_entry.via_relation = rel;
traversal_stack.push(new_entry);
- to->done = NODE_IN_STACK;
+ to->tag = NODE_IN_STACK;
all_child_traversed = false;
+ node->done = i;
break;
}
}
}
if (all_child_traversed) {
- node->done = NODE_VISITED;
+ node->tag = NODE_VISITED;
traversal_stack.pop();
}
}
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc b/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc
index 1812384..e312c4e 100644
--- a/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc
+++ b/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc
@@ -34,7 +34,6 @@
#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
#include "MEM_guardedalloc.h"
@@ -56,8 +55,10 @@ extern "C" {
#include "DNA_key_types.h"
#include "DNA_lamp_types.h"
#include "DNA_material_types.h"
+#include "DNA_mask_types.h"
#include "DNA_mesh_types.h"
#include "DNA_meta_types.h"
+#include "DNA_movieclip_types.h"
#include "DNA_node_types.h"
#include "DNA_particle_types.h"
#include "DNA_object_types.h"
@@ -106,9 +107,44 @@ extern "C" {
#include "intern/nodes/deg_node_operation.h"
#include "intern/depsgraph_types.h"
#include "intern/depsgraph_intern.h"
+#include "util/deg_util_foreach.h"
namespace DEG {
+namespace {
+
+struct BuilderWalkUserData {
+ DepsgraphNodeBuilder *builder;
+ Scene *scene;
+};
+
+static void modifier_walk(void *user_data,
+ struct Object * /*ob*/,
+ struct Object **obpoin,
+ int /*cd_flag*/)
+{
+ BuilderWalkUserData *data = (BuilderWalkUserData *)user_data;
+ if (*obpoin) {
+ data->builder->build_object(data->scene, NULL, *obpoin);
+ }
+}
+
+void constraint_walk(bConstraint * /*con*/,
+ ID **idpoin,
+ bool /*is_reference*/,
+ void *user_data)
+{
+ BuilderWalkUserData *data = (BuilderWalkUserData *)user_data;
+ if (*idpoin) {
+ ID *id = *idpoin;
+ if (GS(id->name) == ID_OB) {
+ data->builder->build_object(data->scene, NULL, (Object *)id);
+ }
+ }
+}
+
+} /* namespace */
+
/* ************ */
/* Node Builder */
@@ -131,8 +167,7 @@ RootDepsNode *DepsgraphNodeBuilder::add_root_node()
IDDepsNode *DepsgraphNodeBuilder::add_id_node(ID *id)
{
- const char *idtype_name = BKE_idcode_to_name(GS(id->name));
- return m_graph->add_id_node(id, string(id->name + 2) + "[" + idtype_name + "]");
+ return m_graph->add_id_node(id, id->name);
}
TimeSourceDepsNode *DepsgraphNodeBuilder::add_time_source(ID *id)
@@ -179,7 +214,7 @@ TimeSourceDepsNode *DepsgraphNodeBuilder::add_time_source(ID *id)
ComponentDepsNode *DepsgraphNodeBuilder::add_component_node(
ID *id,
eDepsNode_Type comp_type,
- const string &comp_name)
+ const char *comp_name)
{
IDDepsNode *id_node = add_id_node(id);
ComponentDepsNode *comp_node = id_node->add_component(comp_type, comp_name);
@@ -192,15 +227,19 @@ OperationDepsNode *DepsgraphNodeBuilder::add_operation_node(
eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string &description)
+ const char *name,
+ int name_tag)
{
- OperationDepsNode *op_node = comp_node->has_operation(opcode, description);
+ OperationDepsNode *op_node = comp_node->has_operation(opcode,
+ name,
+ name_tag);
if (op_node == NULL) {
- op_node = comp_node->add_operation(optype, op, opcode, description);
+ op_node = comp_node->add_operation(optype, op, opcode, name, name_tag);
m_graph->operations.push_back(op_node);
}
else {
- fprintf(stderr, "add_operation: Operation already exists - %s has %s at %p\n",
+ fprintf(stderr,
+ "add_operation: Operation already exists - %s has %s at %p\n",
comp_node->identifier().c_str(),
op_node->identifier().c_str(),
op_node);
@@ -212,14 +251,15 @@ OperationDepsNode *DepsgraphNodeBuilder::add_operation_node(
OperationDepsNode *DepsgraphNodeBuilder::add_operation_node(
ID *id,
eDepsNode_Type comp_type,
- const string &comp_name,
+ const char *comp_name,
eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string &description)
+ const char *name,
+ int name_tag)
{
ComponentDepsNode *comp_node = add_component_node(id, comp_type, comp_name);
- return add_operation_node(comp_node, optype, op, opcode, description);
+ return add_operation_node(comp_node, optype, op, opcode, name, name_tag);
}
OperationDepsNode *DepsgraphNodeBuilder::add_operation_node(
@@ -228,128 +268,58 @@ OperationDepsNode *DepsgraphNodeBuilder::add_operation_node(
eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string& description)
+ const char *name,
+ int name_tag)
{
- return add_operation_node(id, comp_type, "", optype, op, opcode, description);
+ return add_operation_node(id,
+ comp_type,
+ "",
+ optype,
+ op,
+ opcode,
+ name,
+ name_tag);
}
bool DepsgraphNodeBuilder::has_operation_node(ID *id,
eDepsNode_Type comp_type,
- const string &comp_name,
+ const char *comp_name,
eDepsOperation_Code opcode,
- const string &description)
+ const char *name,
+ int name_tag)
{
- return find_operation_node(id, comp_type, comp_name, opcode, description) != NULL;
+ return find_operation_node(id,
+ comp_type,
+ comp_name,
+ opcode,
+ name,
+ name_tag) != NULL;
}
OperationDepsNode *DepsgraphNodeBuilder::find_operation_node(
ID *id,
eDepsNode_Type comp_type,
- const string &comp_name,
+ const char *comp_name,
eDepsOperation_Code opcode,
- const string &description)
+ const char *name,
+ int name_tag)
{
ComponentDepsNode *comp_node = add_component_node(id, comp_type, comp_name);
- return comp_node->has_operation(opcode, description);
+ return comp_node->has_operation(opcode, name, name_tag);
}
OperationDepsNode *DepsgraphNodeBuilder::find_operation_node(
ID *id,
eDepsNode_Type comp_type,
eDepsOperation_Code opcode,
- const string& description)
+ const char *name,
+ int name_tag)
{
- return find_operation_node(id, comp_type, "", opcode, description);
+ return find_operation_node(id, comp_type, "", opcode, name, name_tag);
}
/* **** Build functions for entity nodes **** */
-void DepsgraphNodeBuilder::build_scene(Main *bmain, Scene *scene)
-{
- /* LIB_TAG_DOIT is used to indicate whether node for given ID was already
- * created or not. This flag is being set in add_id_node(), so functions
- * shouldn't bother with setting it, they only might query this flag when
- * needed.
- */
- BKE_main_id_tag_all(bmain, LIB_TAG_DOIT, false);
- /* XXX nested node trees are not included in tag-clearing above,
- * so we need to do this manually.
- */
- FOREACH_NODETREE(bmain, nodetree, id) {
- if (id != (ID *)nodetree)
- nodetree->id.tag &= ~LIB_TAG_DOIT;
- } FOREACH_NODETREE_END
-
- /* scene ID block */
- add_id_node(&scene->id);
-
- /* timesource */
- add_time_source(NULL);
-
- /* build subgraph for set, and link this in... */
- // XXX: depending on how this goes, that scene itself could probably store its
- // own little partial depsgraph?
- if (scene->set) {
- build_scene(bmain, scene->set);
- }
-
- /* scene objects */
- for (Base *base = (Base *)scene->base.first; base; base = base->next) {
- Object *ob = base->object;
-
- /* object itself */
- build_object(scene, base, ob);
-
- /* object that this is a proxy for */
- // XXX: the way that proxies work needs to be completely reviewed!
- if (ob->proxy) {
- ob->proxy->proxy_from = ob;
- build_object(scene, base, ob->proxy);
- }
-
- /* Object dupligroup. */
- if (ob->dup_group) {
- build_group(scene, base, ob->dup_group);
- }
- }
-
- /* rigidbody */
- if (scene->rigidbody_world) {
- build_rigidbody(scene);
- }
-
- /* scene's animation and drivers */
- if (scene->adt) {
- build_animdata(&scene->id);
- }
-
- /* world */
- if (scene->world) {
- build_world(scene->world);
- }
-
- /* compo nodes */
- if (scene->nodetree) {
- build_compositor(scene);
- }
-
- /* sequencer */
- // XXX...
-
- /* grease pencil */
- if (scene->gpd) {
- build_gpencil(scene->gpd);
- }
-
- /* cache files */
- for (CacheFile *cachefile = static_cast<CacheFile *>(bmain->cachefiles.first);
- cachefile;
- cachefile = static_cast<CacheFile *>(cachefile->id.next))
- {
- build_cachefile(cachefile);
- }
-}
-
void DepsgraphNodeBuilder::build_group(Scene *scene,
Base *base,
Group *group)
@@ -360,10 +330,7 @@ void DepsgraphNodeBuilder::build_group(Scene *scene,
}
group_id->tag |= LIB_TAG_DOIT;
- for (GroupObject *go = (GroupObject *)group->gobject.first;
- go != NULL;
- go = go->next)
- {
+ LINKLIST_FOREACH (GroupObject *, go, &group->gobject) {
build_object(scene, base, go->ob);
}
}
@@ -380,45 +347,74 @@ SubgraphDepsNode *DepsgraphNodeBuilder::build_subgraph(Group *group)
DepsgraphNodeBuilder subgraph_builder(m_bmain, subgraph);
/* add group objects */
- for (GroupObject *go = (GroupObject *)group->gobject.first;
- go != NULL;
- go = go->next)
- {
+ LINKLIST_FOREACH (GroupObject *, go, &group->gobject) {
/*Object *ob = go->ob;*/
- /* Each "group object" is effectively a separate instance of the underlying
- * object data. When the group is evaluated, the transform results and/or
- * some other attributes end up getting overridden by the group
+ /* Each "group object" is effectively a separate instance of the
+ * underlying object data. When the group is evaluated, the transform
+ * results and/or some other attributes end up getting overridden by
+ * the group.
*/
}
- /* create a node for representing subgraph */
+ /* Create a node for representing subgraph. */
SubgraphDepsNode *subgraph_node = m_graph->add_subgraph_node(&group->id);
subgraph_node->graph = subgraph;
- /* make a copy of the data this node will need? */
- // XXX: do we do this now, or later?
- // TODO: need API function which queries graph's ID's hash, and duplicates those blocks thoroughly with all outside links removed...
+ /* Make a copy of the data this node will need? */
+ /* XXX: do we do this now, or later? */
+ /* TODO: need API function which queries graph's ID's hash, and duplicates
+ * those blocks thoroughly with all outside links removed.
+ */
return subgraph_node;
}
void DepsgraphNodeBuilder::build_object(Scene *scene, Base *base, Object *ob)
{
- if (ob->id.tag & LIB_TAG_DOIT) {
- IDDepsNode *id_node = m_graph->find_id_node(&ob->id);
+ const bool has_object = (ob->id.tag & LIB_TAG_DOIT);
+ IDDepsNode *id_node = (has_object)
+ ? m_graph->find_id_node(&ob->id)
+ : add_id_node(&ob->id);
+ /* Update node layers.
+ * Do it for both new and existing ID nodes. This is so because several
+ * bases might be sharing same object.
+ */
+ if (base != NULL) {
id_node->layers |= base->lay;
+ }
+ if (ob == scene->camera) {
+ /* Camera should always be updated, it used directly by viewport. */
+ id_node->layers |= (unsigned int)(-1);
+ }
+ /* Skip rest of components if the ID node was already there. */
+ if (has_object) {
return;
}
-
- IDDepsNode *id_node = add_id_node(&ob->id);
- id_node->layers |= base->lay;
+ ob->id.tag |= LIB_TAG_DOIT;
ob->customdata_mask = 0;
- /* standard components */
+ /* Standard components. */
build_object_transform(scene, ob);
- /* object data */
+ if (ob->parent != NULL) {
+ build_object(scene, NULL, ob->parent);
+ }
+ if (ob->modifiers.first != NULL) {
+ BuilderWalkUserData data;
+ data.builder = this;
+ data.scene = scene;
+ modifiers_foreachObjectLink(ob, modifier_walk, &data);
+ }
+ if (ob->constraints.first != NULL) {
+ BuilderWalkUserData data;
+ data.builder = this;
+ data.scene = scene;
+ modifiers_foreachObjectLink(ob, modifier_walk, &data);
+ BKE_constraints_id_loop(&ob->constraints, constraint_walk, &data);
+ }
+
+ /* Object data. */
if (ob->data) {
/* type-specific data... */
switch (ob->type) {
@@ -428,15 +424,6 @@ void DepsgraphNodeBuilder::build_object(Scene *scene, Base *base, Object *ob)
case OB_SURF:
case OB_MBALL:
case OB_LATTICE:
- {
- /* TODO(sergey): This way using this object's
- * properties as driver target works fine.
- *
- * Does this depend on other nodes?
- */
- add_operation_node(&ob->id, DEPSNODE_TYPE_PARAMETERS, DEPSOP_TYPE_POST, NULL,
- DEG_OPCODE_PLACEHOLDER, "Parameters Eval");
-
build_obdata_geom(scene, ob);
/* TODO(sergey): Only for until we support granular
* update of curves.
@@ -448,7 +435,6 @@ void DepsgraphNodeBuilder::build_object(Scene *scene, Base *base, Object *ob)
}
}
break;
- }
case OB_ARMATURE: /* Pose */
if (ID_IS_LINKED_DATABLOCK(ob) && ob->proxy_from != NULL) {
@@ -558,14 +544,6 @@ void DepsgraphNodeBuilder::build_object_constraints(Scene *scene, Object *ob)
DEG_OPCODE_TRANSFORM_CONSTRAINTS);
}
-void DepsgraphNodeBuilder::build_pose_constraints(Object *ob, bPoseChannel *pchan)
-{
- /* create node for constraint stack */
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_EXEC, function_bind(BKE_pose_constraints_evaluate, _1, ob, pchan),
- DEG_OPCODE_BONE_CONSTRAINTS);
-}
-
/**
* Build graph nodes for AnimData block
* \param id: ID-Block which hosts the AnimData
@@ -593,7 +571,7 @@ void DepsgraphNodeBuilder::build_animdata(ID *id)
}
/* drivers */
- for (FCurve *fcu = (FCurve *)adt->drivers.first; fcu; fcu = fcu->next) {
+ LINKLIST_FOREACH (FCurve *, fcu, &adt->drivers) {
/* create driver */
build_driver(id, fcu);
}
@@ -617,12 +595,17 @@ OperationDepsNode *DepsgraphNodeBuilder::build_driver(ID *id, FCurve *fcu)
OperationDepsNode *driver_op = find_operation_node(id,
DEPSNODE_TYPE_PARAMETERS,
DEG_OPCODE_DRIVER,
- deg_fcurve_id_name(fcu));
+ fcu->rna_path,
+ fcu->array_index);
if (driver_op == NULL) {
- driver_op = add_operation_node(id, DEPSNODE_TYPE_PARAMETERS,
- DEPSOP_TYPE_EXEC, function_bind(BKE_animsys_eval_driver, _1, id, fcu),
- DEG_OPCODE_DRIVER, deg_fcurve_id_name(fcu));
+ driver_op = add_operation_node(id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEPSOP_TYPE_EXEC,
+ function_bind(BKE_animsys_eval_driver, _1, id, fcu),
+ DEG_OPCODE_DRIVER,
+ fcu->rna_path,
+ fcu->array_index);
}
/* tag "scripted expression" drivers as needing Python (due to GIL issues, etc.) */
@@ -701,7 +684,7 @@ void DepsgraphNodeBuilder::build_rigidbody(Scene *scene)
/* objects - simulation participants */
if (rbw->group) {
- for (GroupObject *go = (GroupObject *)rbw->group->gobject.first; go; go = go->next) {
+ LINKLIST_FOREACH (GroupObject *, go, &rbw->group->gobject) {
Object *ob = go->ob;
if (!ob || (ob->type != OB_MESH))
@@ -737,7 +720,7 @@ void DepsgraphNodeBuilder::build_particles(Scene *scene, Object *ob)
ComponentDepsNode *psys_comp = add_component_node(&ob->id, DEPSNODE_TYPE_EVAL_PARTICLES);
/* particle systems */
- for (ParticleSystem *psys = (ParticleSystem *)ob->particlesystem.first; psys; psys = psys->next) {
+ LINKLIST_FOREACH (ParticleSystem *, psys, &ob->particlesystem) {
ParticleSettings *part = psys->part;
/* particle settings */
@@ -747,7 +730,11 @@ void DepsgraphNodeBuilder::build_particles(Scene *scene, Object *ob)
/* this particle system */
// TODO: for now, this will just be a placeholder "ubereval" node
add_operation_node(psys_comp,
- DEPSOP_TYPE_EXEC, function_bind(BKE_particle_system_eval, _1, scene, ob, psys),
+ DEPSOP_TYPE_EXEC, function_bind(BKE_particle_system_eval,
+ _1,
+ scene,
+ ob,
+ psys),
DEG_OPCODE_PSYS_EVAL,
psys->name);
}
@@ -756,207 +743,6 @@ void DepsgraphNodeBuilder::build_particles(Scene *scene, Object *ob)
// TODO...
}
-/* IK Solver Eval Steps */
-void DepsgraphNodeBuilder::build_ik_pose(Scene *scene, Object *ob, bPoseChannel *pchan, bConstraint *con)
-{
- bKinematicConstraint *data = (bKinematicConstraint *)con->data;
-
- /* Find the chain's root. */
- bPoseChannel *rootchan = BKE_armature_ik_solver_find_root(pchan, data);
-
- if (has_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name,
- DEG_OPCODE_POSE_IK_SOLVER))
- {
- return;
- }
-
- /* Operation node for evaluating/running IK Solver. */
- add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name,
- DEPSOP_TYPE_SIM, function_bind(BKE_pose_iktree_evaluate, _1, scene, ob, rootchan),
- DEG_OPCODE_POSE_IK_SOLVER);
-}
-
-/* Spline IK Eval Steps */
-void DepsgraphNodeBuilder::build_splineik_pose(Scene *scene, Object *ob, bPoseChannel *pchan, bConstraint *con)
-{
- bSplineIKConstraint *data = (bSplineIKConstraint *)con->data;
-
- /* Find the chain's root. */
- bPoseChannel *rootchan = BKE_armature_splineik_solver_find_root(pchan, data);
-
- /* Operation node for evaluating/running Spline IK Solver.
- * Store the "root bone" of this chain in the solver, so it knows where to start.
- */
- add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name,
- DEPSOP_TYPE_SIM, function_bind(BKE_pose_splineik_evaluate, _1, scene, ob, rootchan),
- DEG_OPCODE_POSE_SPLINE_IK_SOLVER);
-}
-
-/* Pose/Armature Bones Graph */
-void DepsgraphNodeBuilder::build_rig(Scene *scene, Object *ob)
-{
- bArmature *arm = (bArmature *)ob->data;
-
- /* animation and/or drivers linking posebones to base-armature used to define them
- * NOTE: AnimData here is really used to control animated deform properties,
- * which ideally should be able to be unique across different instances.
- * Eventually, we need some type of proxy/isolation mechanism in-between here
- * to ensure that we can use same rig multiple times in same scene...
- */
- build_animdata(&arm->id);
-
- /* Rebuild pose if not up to date. */
- if (ob->pose == NULL || (ob->pose->flag & POSE_RECALC)) {
- BKE_pose_rebuild(ob, arm);
- /* XXX: Without this animation gets lost in certain circumstances
- * after loading file. Need to investigate further since it does
- * not happen with simple scenes..
- */
- if (ob->adt) {
- ob->adt->recalc |= ADT_RECALC_ANIM;
- }
- }
-
- /* speed optimization for animation lookups */
- if (ob->pose) {
- BKE_pose_channels_hash_make(ob->pose);
- if (ob->pose->flag & POSE_CONSTRAINTS_NEED_UPDATE_FLAGS) {
- BKE_pose_update_constraint_flags(ob->pose);
- }
- }
-
- /* Make sure pose is up-to-date with armature updates. */
- add_operation_node(&arm->id,
- DEPSNODE_TYPE_PARAMETERS,
- DEPSOP_TYPE_EXEC,
- NULL,
- DEG_OPCODE_PLACEHOLDER,
- "Armature Eval");
-
- /**
- * Pose Rig Graph
- * ==============
- *
- * Pose Component:
- * - Mainly used for referencing Bone components.
- * - This is where the evaluation operations for init/exec/cleanup
- * (ik) solvers live, and are later hooked up (so that they can be
- * interleaved during runtime) with bone-operations they depend on/affect.
- * - init_pose_eval() and cleanup_pose_eval() are absolute first and last
- * steps of pose eval process. ALL bone operations must be performed
- * between these two...
- *
- * Bone Component:
- * - Used for representing each bone within the rig
- * - Acts to encapsulate the evaluation operations (base matrix + parenting,
- * and constraint stack) so that they can be easily found.
- * - Everything else which depends on bone-results hook up to the component only
- * so that we can redirect those to point at either the the post-IK/
- * post-constraint/post-matrix steps, as needed.
- */
-
- /* pose eval context */
- add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE,
- DEPSOP_TYPE_INIT, function_bind(BKE_pose_eval_init, _1, scene, ob, ob->pose), DEG_OPCODE_POSE_INIT);
-
- add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE,
- DEPSOP_TYPE_POST, function_bind(BKE_pose_eval_flush, _1, scene, ob, ob->pose), DEG_OPCODE_POSE_DONE);
-
- /* bones */
- for (bPoseChannel *pchan = (bPoseChannel *)ob->pose->chanbase.first; pchan; pchan = pchan->next) {
- /* node for bone eval */
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_INIT, NULL, // XXX: BKE_pose_eval_bone_local
- DEG_OPCODE_BONE_LOCAL);
-
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_EXEC, function_bind(BKE_pose_eval_bone, _1, scene, ob, pchan), // XXX: BKE_pose_eval_bone_pose
- DEG_OPCODE_BONE_POSE_PARENT);
-
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_OUT, NULL, /* NOTE: dedicated noop for easier relationship construction */
- DEG_OPCODE_BONE_READY);
-
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_POST, function_bind(BKE_pose_bone_done, _1, pchan),
- DEG_OPCODE_BONE_DONE);
-
- /* constraints */
- if (pchan->constraints.first != NULL) {
- build_pose_constraints(ob, pchan);
- }
-
- /**
- * IK Solvers...
- *
- * - These require separate processing steps are pose-level
- * to be executed between chains of bones (i.e. once the
- * base transforms of a bunch of bones is done)
- *
- * Unsolved Issues:
- * - Care is needed to ensure that multi-headed trees work out the same as in ik-tree building
- * - Animated chain-lengths are a problem...
- */
- for (bConstraint *con = (bConstraint *)pchan->constraints.first; con; con = con->next) {
- switch (con->type) {
- case CONSTRAINT_TYPE_KINEMATIC:
- build_ik_pose(scene, ob, pchan, con);
- break;
-
- case CONSTRAINT_TYPE_SPLINEIK:
- build_splineik_pose(scene, ob, pchan, con);
- break;
-
- default:
- break;
- }
- }
- }
-}
-
-void DepsgraphNodeBuilder::build_proxy_rig(Object *ob)
-{
- ID *obdata = (ID *)ob->data;
- build_animdata(obdata);
-
- BLI_assert(ob->pose != NULL);
-
- /* speed optimization for animation lookups */
- BKE_pose_channels_hash_make(ob->pose);
- if (ob->pose->flag & POSE_CONSTRAINTS_NEED_UPDATE_FLAGS) {
- BKE_pose_update_constraint_flags(ob->pose);
- }
-
- add_operation_node(&ob->id,
- DEPSNODE_TYPE_EVAL_POSE,
- DEPSOP_TYPE_INIT,
- function_bind(BKE_pose_eval_proxy_copy, _1, ob),
- DEG_OPCODE_POSE_INIT);
-
- for (bPoseChannel *pchan = (bPoseChannel *)ob->pose->chanbase.first;
- pchan != NULL;
- pchan = pchan->next)
- {
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_INIT, NULL,
- DEG_OPCODE_BONE_LOCAL);
-
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_EXEC, NULL,
- DEG_OPCODE_BONE_READY);
-
- add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
- DEPSOP_TYPE_POST, NULL,
- DEG_OPCODE_BONE_DONE);
- }
-
- add_operation_node(&ob->id,
- DEPSNODE_TYPE_EVAL_POSE,
- DEPSOP_TYPE_POST,
- NULL,
- DEG_OPCODE_POSE_DONE);
-}
-
/* Shapekeys */
void DepsgraphNodeBuilder::build_shapekeys(Key *key)
{
@@ -972,6 +758,18 @@ void DepsgraphNodeBuilder::build_obdata_geom(Scene *scene, Object *ob)
{
ID *obdata = (ID *)ob->data;
+ /* TODO(sergey): This way using this object's properties as driver target
+ * works fine.
+ *
+ * Does this depend on other nodes?
+ */
+ add_operation_node(&ob->id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEPSOP_TYPE_POST,
+ NULL,
+ DEG_OPCODE_PLACEHOLDER,
+ "Parameters Eval");
+
/* Temporary uber-update node, which does everything.
* It is for the being we're porting old dependencies into the new system.
* We'll get rid of this node as soon as all the granular update functions
@@ -979,39 +777,42 @@ void DepsgraphNodeBuilder::build_obdata_geom(Scene *scene, Object *ob)
*
* TODO(sergey): Get rid of this node.
*/
- add_operation_node(&ob->id, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_POST, function_bind(BKE_object_eval_uber_data, _1, scene, ob),
+ add_operation_node(&ob->id,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_POST,
+ function_bind(BKE_object_eval_uber_data, _1, scene, ob),
DEG_OPCODE_GEOMETRY_UBEREVAL);
- add_operation_node(&ob->id, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_INIT, NULL,
- DEG_OPCODE_PLACEHOLDER, "Eval Init");
+ add_operation_node(&ob->id,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_INIT,
+ NULL,
+ DEG_OPCODE_PLACEHOLDER,
+ "Eval Init");
// TODO: "Done" operation
/* Modifiers */
- if (ob->modifiers.first) {
- ModifierData *md;
-
- for (md = (ModifierData *)ob->modifiers.first; md; md = md->next) {
- add_operation_node(&ob->id, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_EXEC, function_bind(BKE_object_eval_modifier, _1, scene, ob, md),
- DEG_OPCODE_GEOMETRY_MODIFIER, md->name);
- }
+ LINKLIST_FOREACH (ModifierData *, md, &ob->modifiers) {
+ add_operation_node(&ob->id,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_EXEC,
+ function_bind(BKE_object_eval_modifier,
+ _1,
+ scene,
+ ob,
+ md),
+ DEG_OPCODE_GEOMETRY_MODIFIER,
+ md->name);
}
/* materials */
- if (ob->totcol) {
- int a;
-
- for (a = 1; a <= ob->totcol; a++) {
- Material *ma = give_current_material(ob, a);
-
- if (ma) {
- // XXX?!
- ComponentDepsNode *geom_node = add_component_node(&ob->id, DEPSNODE_TYPE_GEOMETRY);
- build_material(geom_node, ma);
- }
+ for (int a = 1; a <= ob->totcol; a++) {
+ Material *ma = give_current_material(ob, a);
+ if (ma != NULL) {
+ // XXX?!
+ ComponentDepsNode *geom_node = add_component_node(&ob->id, DEPSNODE_TYPE_GEOMETRY);
+ build_material(geom_node, ma);
}
}
@@ -1032,16 +833,23 @@ void DepsgraphNodeBuilder::build_obdata_geom(Scene *scene, Object *ob)
build_animdata(obdata);
- /* nodes for result of obdata's evaluation, and geometry evaluation on object */
+ /* Nodes for result of obdata's evaluation, and geometry
+ * evaluation on object.
+ */
switch (ob->type) {
case OB_MESH:
{
//Mesh *me = (Mesh *)ob->data;
/* evaluation operations */
- add_operation_node(obdata, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_INIT, function_bind(BKE_mesh_eval_geometry, _1, (Mesh *)obdata),
- DEG_OPCODE_PLACEHOLDER, "Geometry Eval");
+ add_operation_node(obdata,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_INIT,
+ function_bind(BKE_mesh_eval_geometry,
+ _1,
+ (Mesh *)obdata),
+ DEG_OPCODE_PLACEHOLDER,
+ "Geometry Eval");
break;
}
@@ -1049,48 +857,76 @@ void DepsgraphNodeBuilder::build_obdata_geom(Scene *scene, Object *ob)
{
Object *mom = BKE_mball_basis_find(scene, ob);
- /* motherball - mom depends on children! */
+ /* Motherball - mom depends on children! */
if (mom == ob) {
/* metaball evaluation operations */
/* NOTE: only the motherball gets evaluated! */
- add_operation_node(obdata, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_INIT, function_bind(BKE_mball_eval_geometry, _1, (MetaBall *)obdata),
- DEG_OPCODE_PLACEHOLDER, "Geometry Eval");
+ add_operation_node(obdata,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_INIT,
+ function_bind(BKE_mball_eval_geometry,
+ _1,
+ (MetaBall *)obdata),
+ DEG_OPCODE_PLACEHOLDER,
+ "Geometry Eval");
}
break;
}
case OB_CURVE:
+ case OB_SURF:
case OB_FONT:
{
- /* curve evaluation operations */
+ /* Curve/nurms evaluation operations. */
/* - calculate curve geometry (including path) */
- add_operation_node(obdata, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_INIT, function_bind(BKE_curve_eval_geometry, _1, (Curve *)obdata),
- DEG_OPCODE_PLACEHOLDER, "Geometry Eval");
-
- /* - calculate curve path - this is used by constraints, etc. */
- add_operation_node(obdata, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_EXEC, function_bind(BKE_curve_eval_path, _1, (Curve *)obdata),
- DEG_OPCODE_GEOMETRY_PATH, "Path");
- break;
- }
+ add_operation_node(obdata,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_INIT,
+ function_bind(BKE_curve_eval_geometry,
+ _1,
+ (Curve *)obdata),
+ DEG_OPCODE_PLACEHOLDER,
+ "Geometry Eval");
+
+ /* Calculate curve path - this is used by constraints, etc. */
+ if (ELEM(ob->type, OB_CURVE, OB_FONT)) {
+ add_operation_node(obdata,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_EXEC,
+ function_bind(BKE_curve_eval_path,
+ _1,
+ (Curve *)obdata),
+ DEG_OPCODE_GEOMETRY_PATH,
+ "Path");
+ }
- case OB_SURF: /* Nurbs Surface */
- {
- /* nurbs evaluation operations */
- add_operation_node(obdata, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_INIT, function_bind(BKE_curve_eval_geometry, _1, (Curve *)obdata),
- DEG_OPCODE_PLACEHOLDER, "Geometry Eval");
+ /* Make sure objects used for bevel.taper are in the graph.
+ * NOTE: This objects might be not linked to the scene.
+ */
+ Curve *cu = (Curve *)obdata;
+ if (cu->bevobj != NULL) {
+ build_object(scene, NULL, cu->bevobj);
+ }
+ if (cu->taperobj != NULL) {
+ build_object(scene, NULL, cu->taperobj);
+ }
+ if (ob->type == OB_FONT && cu->textoncurve != NULL) {
+ build_object(scene, NULL, cu->textoncurve);
+ }
break;
}
- case OB_LATTICE: /* Lattice */
+ case OB_LATTICE:
{
- /* lattice evaluation operations */
- add_operation_node(obdata, DEPSNODE_TYPE_GEOMETRY,
- DEPSOP_TYPE_INIT, function_bind(BKE_lattice_eval_geometry, _1, (Lattice *)obdata),
- DEG_OPCODE_PLACEHOLDER, "Geometry Eval");
+ /* Lattice evaluation operations. */
+ add_operation_node(obdata,
+ DEPSNODE_TYPE_GEOMETRY,
+ DEPSOP_TYPE_INIT,
+ function_bind(BKE_lattice_eval_geometry,
+ _1,
+ (Lattice *)obdata),
+ DEG_OPCODE_PLACEHOLDER,
+ "Geometry Eval");
break;
}
}
@@ -1170,16 +1006,21 @@ void DepsgraphNodeBuilder::build_nodetree(DepsNode *owner_node, bNodeTree *ntree
DEG_OPCODE_PLACEHOLDER, "Parameters Eval");
/* nodetree's nodes... */
- for (bNode *bnode = (bNode *)ntree->nodes.first; bnode; bnode = bnode->next) {
- if (bnode->id) {
- if (GS(bnode->id->name) == ID_MA) {
- build_material(owner_node, (Material *)bnode->id);
+ LINKLIST_FOREACH (bNode *, bnode, &ntree->nodes) {
+ ID *id = bnode->id;
+ if (id != NULL) {
+ short id_type = GS(id->name);
+ if (id_type == ID_MA) {
+ build_material(owner_node, (Material *)id);
}
- else if (bnode->type == ID_TE) {
- build_texture(owner_node, (Tex *)bnode->id);
+ else if (id_type == ID_TE) {
+ build_texture(owner_node, (Tex *)id);
+ }
+ else if (id_type == ID_IM) {
+ build_image((Image *)id);
}
else if (bnode->type == NODE_GROUP) {
- bNodeTree *group_ntree = (bNodeTree *)bnode->id;
+ bNodeTree *group_ntree = (bNodeTree *)id;
if ((group_ntree->id.tag & LIB_TAG_DOIT) == 0) {
build_nodetree(owner_node, group_ntree);
}
@@ -1236,10 +1077,33 @@ void DepsgraphNodeBuilder::build_texture(DepsNode *owner_node, Tex *tex)
return;
}
tex_id->tag |= LIB_TAG_DOIT;
- /* texture itself */
+ /* Texture itself. */
build_animdata(tex_id);
- /* texture's nodetree */
+ /* Texture's nodetree. */
build_nodetree(owner_node, tex->nodetree);
+ /* Special cases for different IDs which texture uses. */
+ if (tex->type == TEX_IMAGE) {
+ if (tex->ima != NULL) {
+ build_image(tex->ima);
+ }
+ }
+}
+
+void DepsgraphNodeBuilder::build_image(Image *image) {
+ ID *image_id = &image->id;
+ if (image_id->tag & LIB_TAG_DOIT) {
+ return;
+ }
+ image_id->tag |= LIB_TAG_DOIT;
+ /* Image ID node itself. */
+ add_id_node(image_id);
+ /* Placeholder so we can add relations and tag ID node for update. */
+ add_operation_node(image_id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEPSOP_TYPE_EXEC,
+ NULL,
+ DEG_OPCODE_PLACEHOLDER,
+ "Image Eval");
}
void DepsgraphNodeBuilder::build_compositor(Scene *scene)
@@ -1273,7 +1137,6 @@ void DepsgraphNodeBuilder::build_cachefile(CacheFile *cache_file)
ID *cache_file_id = &cache_file->id;
add_component_node(cache_file_id, DEPSNODE_TYPE_CACHE);
-
add_operation_node(cache_file_id, DEPSNODE_TYPE_CACHE,
DEPSOP_TYPE_EXEC, NULL,
DEG_OPCODE_PLACEHOLDER, "Cache File Update");
@@ -1282,4 +1145,17 @@ void DepsgraphNodeBuilder::build_cachefile(CacheFile *cache_file)
build_animdata(cache_file_id);
}
+void DepsgraphNodeBuilder::build_mask(Mask *mask)
+{
+ ID *mask_id = &mask->id;
+ add_id_node(mask_id);
+ build_animdata(mask_id);
+}
+
+void DepsgraphNodeBuilder::build_movieclip(MovieClip *clip) {
+ ID *clip_id = &clip->id;
+ add_id_node(clip_id);
+ build_animdata(clip_id);
+}
+
} // namespace DEG
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_nodes.h b/source/blender/depsgraph/intern/builder/deg_builder_nodes.h
index f378f07..9cb8bc5 100644
--- a/source/blender/depsgraph/intern/builder/deg_builder_nodes.h
+++ b/source/blender/depsgraph/intern/builder/deg_builder_nodes.h
@@ -38,12 +38,15 @@ struct bGPdata;
struct ListBase;
struct GHash;
struct ID;
+struct Image;
struct FCurve;
struct Group;
struct Key;
struct Main;
struct Material;
+struct Mask;
struct MTex;
+struct MovieClip;
struct bNodeTree;
struct Object;
struct bPoseChannel;
@@ -75,43 +78,49 @@ struct DepsgraphNodeBuilder {
ComponentDepsNode *add_component_node(ID *id,
eDepsNode_Type comp_type,
- const string& comp_name = "");
+ const char *comp_name = "");
OperationDepsNode *add_operation_node(ComponentDepsNode *comp_node,
eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string& description = "");
+ const char *name = "",
+ int name_tag = -1);
OperationDepsNode *add_operation_node(ID *id,
eDepsNode_Type comp_type,
- const string& comp_name,
+ const char *comp_name,
eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string& description = "");
+ const char *name = "",
+ int name_tag = -1);
OperationDepsNode *add_operation_node(ID *id,
eDepsNode_Type comp_type,
eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string& description = "");
+ const char *name = "",
+ int name_tag = -1);
bool has_operation_node(ID *id,
eDepsNode_Type comp_type,
- const string& comp_name,
+ const char *comp_name,
eDepsOperation_Code opcode,
- const string& description = "");
+ const char *name = "",
+ int name_tag = -1);
OperationDepsNode *find_operation_node(ID *id,
eDepsNode_Type comp_type,
- const string &comp_name,
+ const char *comp_name,
eDepsOperation_Code opcode,
- const string &description = "");
+ const char *name = "",
+ int name_tag = -1);
OperationDepsNode *find_operation_node(ID *id,
eDepsNode_Type comp_type,
eDepsOperation_Code opcode,
- const string &description = "");
+ const char *name = "",
+ int name_tag = -1);
void build_scene(Main *bmain, Scene *scene);
SubgraphDepsNode *build_subgraph(Group *group);
@@ -142,10 +151,13 @@ struct DepsgraphNodeBuilder {
void build_material(DepsNode *owner_node, Material *ma);
void build_texture(DepsNode *owner_node, Tex *tex);
void build_texture_stack(DepsNode *owner_node, MTex **texture_stack);
+ void build_image(Image *image);
void build_world(World *world);
void build_compositor(Scene *scene);
void build_gpencil(bGPdata *gpd);
void build_cachefile(CacheFile *cache_file);
+ void build_mask(Mask *mask);
+ void build_movieclip(MovieClip *clip);
protected:
Main *m_bmain;
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_nodes_rig.cc b/source/blender/depsgraph/intern/builder/deg_builder_nodes_rig.cc
new file mode 100644
index 0000000..4a5f3dc
--- /dev/null
+++ b/source/blender/depsgraph/intern/builder/deg_builder_nodes_rig.cc
@@ -0,0 +1,273 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2013 Blender Foundation.
+ * All rights reserved.
+ *
+ * Original Author: Joshua Leung
+ * Contributor(s): Based on original depsgraph.c code - Blender Foundation (2005-2013)
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/depsgraph/intern/builder/deg_builder_nodes_rig.cc
+ * \ingroup depsgraph
+ *
+ * Methods for constructing depsgraph's nodes
+ */
+
+#include "intern/builder/deg_builder_nodes.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "MEM_guardedalloc.h"
+
+extern "C" {
+#include "BLI_blenlib.h"
+#include "BLI_string.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_anim_types.h"
+#include "DNA_armature_types.h"
+#include "DNA_constraint_types.h"
+#include "DNA_object_types.h"
+
+#include "BKE_action.h"
+#include "BKE_armature.h"
+
+#include "DEG_depsgraph.h"
+#include "DEG_depsgraph_build.h"
+} /* extern "C" */
+
+#include "intern/builder/deg_builder.h"
+#include "intern/nodes/deg_node.h"
+#include "intern/nodes/deg_node_component.h"
+#include "intern/nodes/deg_node_operation.h"
+#include "intern/depsgraph_types.h"
+#include "intern/depsgraph_intern.h"
+#include "util/deg_util_foreach.h"
+
+namespace DEG {
+
+void DepsgraphNodeBuilder::build_pose_constraints(Object *ob, bPoseChannel *pchan)
+{
+ /* create node for constraint stack */
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_EXEC, function_bind(BKE_pose_constraints_evaluate, _1, ob, pchan),
+ DEG_OPCODE_BONE_CONSTRAINTS);
+}
+
+/* IK Solver Eval Steps */
+void DepsgraphNodeBuilder::build_ik_pose(Scene *scene, Object *ob, bPoseChannel *pchan, bConstraint *con)
+{
+ bKinematicConstraint *data = (bKinematicConstraint *)con->data;
+
+ /* Find the chain's root. */
+ bPoseChannel *rootchan = BKE_armature_ik_solver_find_root(pchan, data);
+
+ if (has_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name,
+ DEG_OPCODE_POSE_IK_SOLVER))
+ {
+ return;
+ }
+
+ /* Operation node for evaluating/running IK Solver. */
+ add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name,
+ DEPSOP_TYPE_SIM, function_bind(BKE_pose_iktree_evaluate, _1, scene, ob, rootchan),
+ DEG_OPCODE_POSE_IK_SOLVER);
+}
+
+/* Spline IK Eval Steps */
+void DepsgraphNodeBuilder::build_splineik_pose(Scene *scene, Object *ob, bPoseChannel *pchan, bConstraint *con)
+{
+ bSplineIKConstraint *data = (bSplineIKConstraint *)con->data;
+
+ /* Find the chain's root. */
+ bPoseChannel *rootchan = BKE_armature_splineik_solver_find_root(pchan, data);
+
+ /* Operation node for evaluating/running Spline IK Solver.
+ * Store the "root bone" of this chain in the solver, so it knows where to start.
+ */
+ add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name,
+ DEPSOP_TYPE_SIM, function_bind(BKE_pose_splineik_evaluate, _1, scene, ob, rootchan),
+ DEG_OPCODE_POSE_SPLINE_IK_SOLVER);
+}
+
+/* Pose/Armature Bones Graph */
+void DepsgraphNodeBuilder::build_rig(Scene *scene, Object *ob)
+{
+ bArmature *arm = (bArmature *)ob->data;
+
+ /* animation and/or drivers linking posebones to base-armature used to define them
+ * NOTE: AnimData here is really used to control animated deform properties,
+ * which ideally should be able to be unique across different instances.
+ * Eventually, we need some type of proxy/isolation mechanism in-between here
+ * to ensure that we can use same rig multiple times in same scene...
+ */
+ build_animdata(&arm->id);
+
+ /* Rebuild pose if not up to date. */
+ if (ob->pose == NULL || (ob->pose->flag & POSE_RECALC)) {
+ BKE_pose_rebuild_ex(ob, arm, false);
+ /* XXX: Without this animation gets lost in certain circumstances
+ * after loading file. Need to investigate further since it does
+ * not happen with simple scenes..
+ */
+ if (ob->adt) {
+ ob->adt->recalc |= ADT_RECALC_ANIM;
+ }
+ }
+
+ /* speed optimization for animation lookups */
+ if (ob->pose) {
+ BKE_pose_channels_hash_make(ob->pose);
+ if (ob->pose->flag & POSE_CONSTRAINTS_NEED_UPDATE_FLAGS) {
+ BKE_pose_update_constraint_flags(ob->pose);
+ }
+ }
+
+ /* Make sure pose is up-to-date with armature updates. */
+ add_operation_node(&arm->id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEPSOP_TYPE_EXEC,
+ NULL,
+ DEG_OPCODE_PLACEHOLDER,
+ "Armature Eval");
+
+ /**
+ * Pose Rig Graph
+ * ==============
+ *
+ * Pose Component:
+ * - Mainly used for referencing Bone components.
+ * - This is where the evaluation operations for init/exec/cleanup
+ * (ik) solvers live, and are later hooked up (so that they can be
+ * interleaved during runtime) with bone-operations they depend on/affect.
+ * - init_pose_eval() and cleanup_pose_eval() are absolute first and last
+ * steps of pose eval process. ALL bone operations must be performed
+ * between these two...
+ *
+ * Bone Component:
+ * - Used for representing each bone within the rig
+ * - Acts to encapsulate the evaluation operations (base matrix + parenting,
+ * and constraint stack) so that they can be easily found.
+ * - Everything else which depends on bone-results hook up to the component only
+ * so that we can redirect those to point at either the the post-IK/
+ * post-constraint/post-matrix steps, as needed.
+ */
+
+ /* pose eval context */
+ add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE,
+ DEPSOP_TYPE_INIT, function_bind(BKE_pose_eval_init, _1, scene, ob, ob->pose), DEG_OPCODE_POSE_INIT);
+
+ add_operation_node(&ob->id, DEPSNODE_TYPE_EVAL_POSE,
+ DEPSOP_TYPE_POST, function_bind(BKE_pose_eval_flush, _1, scene, ob, ob->pose), DEG_OPCODE_POSE_DONE);
+
+ /* bones */
+ LINKLIST_FOREACH (bPoseChannel *, pchan, &ob->pose->chanbase) {
+ /* node for bone eval */
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_INIT, NULL, // XXX: BKE_pose_eval_bone_local
+ DEG_OPCODE_BONE_LOCAL);
+
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_EXEC, function_bind(BKE_pose_eval_bone, _1, scene, ob, pchan), // XXX: BKE_pose_eval_bone_pose
+ DEG_OPCODE_BONE_POSE_PARENT);
+
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_OUT, NULL, /* NOTE: dedicated noop for easier relationship construction */
+ DEG_OPCODE_BONE_READY);
+
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_POST, function_bind(BKE_pose_bone_done, _1, pchan),
+ DEG_OPCODE_BONE_DONE);
+
+ /* constraints */
+ if (pchan->constraints.first != NULL) {
+ build_pose_constraints(ob, pchan);
+ }
+
+ /**
+ * IK Solvers...
+ *
+ * - These require separate processing steps are pose-level
+ * to be executed between chains of bones (i.e. once the
+ * base transforms of a bunch of bones is done)
+ *
+ * Unsolved Issues:
+ * - Care is needed to ensure that multi-headed trees work out the same as in ik-tree building
+ * - Animated chain-lengths are a problem...
+ */
+ LINKLIST_FOREACH (bConstraint *, con, &pchan->constraints) {
+ switch (con->type) {
+ case CONSTRAINT_TYPE_KINEMATIC:
+ build_ik_pose(scene, ob, pchan, con);
+ break;
+
+ case CONSTRAINT_TYPE_SPLINEIK:
+ build_splineik_pose(scene, ob, pchan, con);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+}
+
+void DepsgraphNodeBuilder::build_proxy_rig(Object *ob)
+{
+ ID *obdata = (ID *)ob->data;
+ build_animdata(obdata);
+
+ BLI_assert(ob->pose != NULL);
+
+ /* speed optimization for animation lookups */
+ BKE_pose_channels_hash_make(ob->pose);
+ if (ob->pose->flag & POSE_CONSTRAINTS_NEED_UPDATE_FLAGS) {
+ BKE_pose_update_constraint_flags(ob->pose);
+ }
+
+ add_operation_node(&ob->id,
+ DEPSNODE_TYPE_EVAL_POSE,
+ DEPSOP_TYPE_INIT,
+ function_bind(BKE_pose_eval_proxy_copy, _1, ob),
+ DEG_OPCODE_POSE_INIT);
+
+ LINKLIST_FOREACH (bPoseChannel *, pchan, &ob->pose->chanbase) {
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_INIT, NULL,
+ DEG_OPCODE_BONE_LOCAL);
+
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_EXEC, NULL,
+ DEG_OPCODE_BONE_READY);
+
+ add_operation_node(&ob->id, DEPSNODE_TYPE_BONE, pchan->name,
+ DEPSOP_TYPE_POST, NULL,
+ DEG_OPCODE_BONE_DONE);
+ }
+
+ add_operation_node(&ob->id,
+ DEPSNODE_TYPE_EVAL_POSE,
+ DEPSOP_TYPE_POST,
+ NULL,
+ DEG_OPCODE_POSE_DONE);
+}
+
+} // namespace DEG
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_nodes_scene.cc b/source/blender/depsgraph/intern/builder/deg_builder_nodes_scene.cc
new file mode 100644
index 0000000..bcd4bc5
--- /dev/null
+++ b/source/blender/depsgraph/intern/builder/deg_builder_nodes_scene.cc
@@ -0,0 +1,159 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2013 Blender Foundation.
+ * All rights reserved.
+ *
+ * Original Author: Joshua Leung
+ * Contributor(s): Based on original depsgraph.c code - Blender Foundation (2005-2013)
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/depsgraph/intern/builder/deg_builder_nodes_scene.cc
+ * \ingroup depsgraph
+ *
+ * Methods for constructing depsgraph's nodes
+ */
+
+#include "intern/builder/deg_builder_nodes.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "MEM_guardedalloc.h"
+
+extern "C" {
+#include "BLI_blenlib.h"
+#include "BLI_string.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_node_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+
+#include "BKE_main.h"
+#include "BKE_node.h"
+
+#include "DEG_depsgraph.h"
+#include "DEG_depsgraph_build.h"
+} /* extern "C" */
+
+#include "intern/builder/deg_builder.h"
+#include "intern/nodes/deg_node.h"
+#include "intern/nodes/deg_node_component.h"
+#include "intern/nodes/deg_node_operation.h"
+#include "intern/depsgraph_types.h"
+#include "intern/depsgraph_intern.h"
+#include "util/deg_util_foreach.h"
+
+namespace DEG {
+
+void DepsgraphNodeBuilder::build_scene(Main *bmain, Scene *scene)
+{
+ /* LIB_TAG_DOIT is used to indicate whether node for given ID was already
+ * created or not. This flag is being set in add_id_node(), so functions
+ * shouldn't bother with setting it, they only might query this flag when
+ * needed.
+ */
+ BKE_main_id_tag_all(bmain, LIB_TAG_DOIT, false);
+ /* XXX nested node trees are not included in tag-clearing above,
+ * so we need to do this manually.
+ */
+ FOREACH_NODETREE(bmain, nodetree, id) {
+ if (id != (ID *)nodetree)
+ nodetree->id.tag &= ~LIB_TAG_DOIT;
+ } FOREACH_NODETREE_END
+
+ /* scene ID block */
+ add_id_node(&scene->id);
+
+ /* timesource */
+ add_time_source(NULL);
+
+ /* build subgraph for set, and link this in... */
+ // XXX: depending on how this goes, that scene itself could probably store its
+ // own little partial depsgraph?
+ if (scene->set) {
+ build_scene(bmain, scene->set);
+ }
+
+ /* scene objects */
+ LINKLIST_FOREACH (Base *, base, &scene->base) {
+ Object *ob = base->object;
+
+ /* object itself */
+ build_object(scene, base, ob);
+
+ /* object that this is a proxy for */
+ // XXX: the way that proxies work needs to be completely reviewed!
+ if (ob->proxy) {
+ ob->proxy->proxy_from = ob;
+ build_object(scene, base, ob->proxy);
+ }
+
+ /* Object dupligroup. */
+ if (ob->dup_group) {
+ build_group(scene, base, ob->dup_group);
+ }
+ }
+
+ /* rigidbody */
+ if (scene->rigidbody_world) {
+ build_rigidbody(scene);
+ }
+
+ /* scene's animation and drivers */
+ if (scene->adt) {
+ build_animdata(&scene->id);
+ }
+
+ /* world */
+ if (scene->world) {
+ build_world(scene->world);
+ }
+
+ /* compo nodes */
+ if (scene->nodetree) {
+ build_compositor(scene);
+ }
+
+ /* sequencer */
+ // XXX...
+
+ /* grease pencil */
+ if (scene->gpd) {
+ build_gpencil(scene->gpd);
+ }
+
+ /* Cache file. */
+ LINKLIST_FOREACH (CacheFile *, cachefile, &bmain->cachefiles) {
+ build_cachefile(cachefile);
+ }
+
+ /* Masks. */
+ LINKLIST_FOREACH (Mask *, mask, &bmain->mask) {
+ build_mask(mask);
+ }
+
+ /* Movie clips. */
+ LINKLIST_FOREACH (MovieClip *, clip, &bmain->movieclip) {
+ build_movieclip(clip);
+ }
+}
+
+} // namespace DEG
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations.cc b/source/blender/depsgraph/intern/builder/deg_builder_relations.cc
index 2148a35..b5272d3 100644
--- a/source/blender/depsgraph/intern/builder/deg_builder_relations.cc
+++ b/source/blender/depsgraph/intern/builder/deg_builder_relations.cc
@@ -34,13 +34,12 @@
#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
+#include <cstring> /* required for STREQ later on. */
#include "MEM_guardedalloc.h"
extern "C" {
#include "BLI_blenlib.h"
-#include "BLI_string.h"
#include "BLI_utildefines.h"
#include "DNA_action_types.h"
@@ -56,8 +55,10 @@ extern "C" {
#include "DNA_key_types.h"
#include "DNA_lamp_types.h"
#include "DNA_material_types.h"
+#include "DNA_mask_types.h"
#include "DNA_mesh_types.h"
#include "DNA_meta_types.h"
+#include "DNA_movieclip_types.h"
#include "DNA_node_types.h"
#include "DNA_particle_types.h"
#include "DNA_object_types.h"
@@ -115,6 +116,32 @@ namespace DEG {
/* ***************** */
/* Relations Builder */
+/* TODO(sergey): This is somewhat weak, but we don't want neither false-positive
+ * time dependencies nor special exceptions in the depsgraph evaluation.
+ */
+static bool python_driver_depends_on_time(ChannelDriver *driver)
+{
+ if (driver->expression[0] == '\0') {
+ /* Empty expression depends on nothing. */
+ return false;
+ }
+ if (strchr(driver->expression, '(') != NULL) {
+ /* Function calls are considered dependent on a time. */
+ return true;
+ }
+ if (strstr(driver->expression, "time") != NULL) {
+ /* Variable `time` depends on time. */
+ /* TODO(sergey): This is a bit weak, but not sure about better way of
+ * handling this.
+ */
+ return true;
+ }
+ /* Possible indirect time relation s should be handled via variable
+ * targets.
+ */
+ return false;
+}
+
/* **** General purpose functions **** */
RNAPathKey::RNAPathKey(ID *id, const char *path) :
@@ -185,10 +212,12 @@ OperationDepsNode *DepsgraphRelationBuilder::find_node(
return NULL;
}
- OperationDepsNode *op_node = comp_node->find_operation(key.opcode, key.name);
+ OperationDepsNode *op_node = comp_node->find_operation(key.opcode,
+ key.name,
+ key.name_tag);
if (!op_node) {
fprintf(stderr, "find_node_operation: Failed for (%s, '%s')\n",
- DEG_OPNAMES[key.opcode], key.name.c_str());
+ DEG_OPNAMES[key.opcode], key.name);
}
return op_node;
}
@@ -210,7 +239,7 @@ OperationDepsNode *DepsgraphRelationBuilder::has_node(
if (!comp_node) {
return NULL;
}
- return comp_node->has_operation(key.opcode, key.name);
+ return comp_node->has_operation(key.opcode, key.name, key.name_tag);
}
void DepsgraphRelationBuilder::add_time_relation(TimeSourceDepsNode *timesrc,
@@ -310,88 +339,6 @@ void DepsgraphRelationBuilder::add_forcefield_relations(const OperationKey &key,
/* **** Functions to build relations between entities **** */
-void DepsgraphRelationBuilder::build_scene(Main *bmain, Scene *scene)
-{
- /* LIB_TAG_DOIT is used to indicate whether node for given ID was already
- * created or not.
- */
- BKE_main_id_tag_all(bmain, LIB_TAG_DOIT, false);
- /* XXX nested node trees are not included in tag-clearing above,
- * so we need to do this manually.
- */
- FOREACH_NODETREE(bmain, nodetree, id) {
- if (id != (ID *)nodetree)
- nodetree->id.tag &= ~LIB_TAG_DOIT;
- } FOREACH_NODETREE_END
-
- if (scene->set) {
- // TODO: link set to scene, especially our timesource...
- }
-
- /* scene objects */
- for (Base *base = (Base *)scene->base.first; base; base = base->next) {
- Object *ob = base->object;
-
- /* object itself */
- build_object(bmain, scene, ob);
-
- /* object that this is a proxy for */
- if (ob->proxy) {
- ob->proxy->proxy_from = ob;
- build_object(bmain, scene, ob->proxy);
- /* TODO(sergey): This is an inverted relation, matches old depsgraph
- * behavior and need to be investigated if it still need to be inverted.
- */
- ComponentKey ob_pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
- ComponentKey proxy_pose_key(&ob->proxy->id, DEPSNODE_TYPE_EVAL_POSE);
- add_relation(ob_pose_key, proxy_pose_key, DEPSREL_TYPE_TRANSFORM, "Proxy");
- }
-
- /* Object dupligroup. */
- if (ob->dup_group) {
- build_group(bmain, scene, ob, ob->dup_group);
- }
- }
-
- /* rigidbody */
- if (scene->rigidbody_world) {
- build_rigidbody(scene);
- }
-
- /* scene's animation and drivers */
- if (scene->adt) {
- build_animdata(&scene->id);
- }
-
- /* world */
- if (scene->world) {
- build_world(scene->world);
- }
-
- /* compo nodes */
- if (scene->nodetree) {
- build_compositor(scene);
- }
-
- /* grease pencil */
- if (scene->gpd) {
- build_gpencil(&scene->id, scene->gpd);
- }
-
- for (Depsgraph::OperationNodes::const_iterator it_op = m_graph->operations.begin();
- it_op != m_graph->operations.end();
- ++it_op)
- {
- OperationDepsNode *node = *it_op;
- IDDepsNode *id_node = node->owner->owner;
- ID *id = id_node->id;
- if (GS(id->name) == ID_OB) {
- Object *object = (Object *)id;
- object->customdata_mask |= node->customdata_mask;
- }
- }
-}
-
void DepsgraphRelationBuilder::build_group(Main *bmain,
Scene *scene,
Object *object,
@@ -402,10 +349,7 @@ void DepsgraphRelationBuilder::build_group(Main *bmain,
OperationKey object_local_transform_key(&object->id,
DEPSNODE_TYPE_TRANSFORM,
DEG_OPCODE_TRANSFORM_LOCAL);
- for (GroupObject *go = (GroupObject *)group->gobject.first;
- go != NULL;
- go = go->next)
- {
+ LINKLIST_FOREACH (GroupObject *, go, &group->gobject) {
if (!group_done) {
build_object(bmain, scene, go->ob);
}
@@ -423,6 +367,7 @@ void DepsgraphRelationBuilder::build_object(Main *bmain, Scene *scene, Object *o
if (ob->id.tag & LIB_TAG_DOIT) {
return;
}
+ ob->id.tag |= LIB_TAG_DOIT;
/* Object Transforms */
eDepsOperation_Code base_op = (ob->parent) ? DEG_OPCODE_TRANSFORM_PARENT : DEG_OPCODE_TRANSFORM_LOCAL;
@@ -461,12 +406,23 @@ void DepsgraphRelationBuilder::build_object(Main *bmain, Scene *scene, Object *o
add_relation(ob_ubereval_key, final_transform_key, DEPSREL_TYPE_COMPONENT_ORDER, "Temp Ubereval");
}
else {
- /* operation order */
- add_relation(base_op_key, final_transform_key, DEPSREL_TYPE_COMPONENT_ORDER, "Object Transform");
-
- // XXX
- add_relation(base_op_key, ob_ubereval_key, DEPSREL_TYPE_COMPONENT_ORDER, "Temp Ubereval");
- add_relation(ob_ubereval_key, final_transform_key, DEPSREL_TYPE_COMPONENT_ORDER, "Temp Ubereval");
+ /* NOTE: Keep an eye here, we skip some relations here to "streamline"
+ * dependencies and avoid transitive relations which causes overhead.
+ * But once we get rid of uber eval node this will need reconsideration.
+ */
+ if (ob->rigidbody_object == NULL) {
+ /* Rigid body will hook up another node inbetween, so skip
+ * relation here to avoid transitive relation.
+ */
+ add_relation(base_op_key,
+ ob_ubereval_key,
+ DEPSREL_TYPE_COMPONENT_ORDER,
+ "Temp Ubereval");
+ }
+ add_relation(ob_ubereval_key,
+ final_transform_key,
+ DEPSREL_TYPE_COMPONENT_ORDER,
+ "Temp Ubereval");
}
@@ -474,7 +430,7 @@ void DepsgraphRelationBuilder::build_object(Main *bmain, Scene *scene, Object *o
build_animdata(&ob->id);
// XXX: This should be hooked up by the build_animdata code
- if (ob->adt && (ob->adt->action || ob->adt->nla_tracks.first)) {
+ if (needs_animdata_node(&ob->id)) {
ComponentKey adt_key(&ob->id, DEPSNODE_TYPE_ANIMATION);
add_relation(adt_key, local_transform_key, DEPSREL_TYPE_OPERATION, "Object Animation");
}
@@ -572,8 +528,20 @@ void DepsgraphRelationBuilder::build_object_parent(Object *ob)
case PARBONE: /* Bone Parent */
{
- ComponentKey parent_key(&ob->parent->id, DEPSNODE_TYPE_BONE, ob->parsubstr);
- add_relation(parent_key, ob_key, DEPSREL_TYPE_TRANSFORM, "Bone Parent");
+ ComponentKey parent_bone_key(&ob->parent->id,
+ DEPSNODE_TYPE_BONE,
+ ob->parsubstr);
+ OperationKey parent_transform_key(&ob->parent->id,
+ DEPSNODE_TYPE_TRANSFORM,
+ DEG_OPCODE_TRANSFORM_FINAL);
+ add_relation(parent_bone_key,
+ ob_key,
+ DEPSREL_TYPE_TRANSFORM,
+ "Bone Parent");
+ add_relation(parent_transform_key,
+ ob_key,
+ DEPSREL_TYPE_TRANSFORM,
+ "Armature Parent");
break;
}
@@ -681,9 +649,10 @@ void DepsgraphRelationBuilder::build_constraints(Scene *scene, ID *id, eDepsNode
ListBase targets = {NULL, NULL};
cti->get_constraint_targets(con, &targets);
- for (bConstraintTarget *ct = (bConstraintTarget *)targets.first; ct; ct = ct->next) {
- if (!ct->tar)
+ LINKLIST_FOREACH (bConstraintTarget *, ct, &targets) {
+ if (ct->tar == NULL) {
continue;
+ }
if (ELEM(con->type, CONSTRAINT_TYPE_KINEMATIC, CONSTRAINT_TYPE_SPLINEIK)) {
/* ignore IK constraints - these are handled separately (on pose level) */
@@ -810,12 +779,64 @@ void DepsgraphRelationBuilder::build_animdata(ID *id)
}
/* drivers */
- for (FCurve *fcu = (FCurve *)adt->drivers.first; fcu; fcu = fcu->next) {
- OperationKey driver_key(id, DEPSNODE_TYPE_PARAMETERS, DEG_OPCODE_DRIVER, deg_fcurve_id_name(fcu));
+ LINKLIST_FOREACH (FCurve *, fcu, &adt->drivers) {
+ OperationKey driver_key(id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEG_OPCODE_DRIVER,
+ fcu->rna_path,
+ fcu->array_index);
/* create the driver's relations to targets */
build_driver(id, fcu);
+ /* Special case for array drivers: we can not multithread them because
+ * of the way how they work internally: animation system will write the
+ * whole array back to RNA even when changing individual array value.
+ *
+ * Some tricky things here:
+ * - array_index is -1 for single channel drivers, meaning we only have
+ * to do some magic when array_index is not -1.
+ * - We do relation from next array index to a previous one, so we don't
+ * have to deal with array index 0.
+ *
+ * TODO(sergey): Avoid liner lookup somehow.
+ */
+ if (fcu->array_index > 0) {
+ FCurve *fcu_prev = NULL;
+ LINKLIST_FOREACH (FCurve *, fcu_candidate, &adt->drivers) {
+ /* Writing to different RNA paths is */
+ if (!STREQ(fcu_candidate->rna_path, fcu->rna_path)) {
+ continue;
+ }
+ /* We only do relation from previous fcurve to previous one. */
+ if (fcu_candidate->array_index >= fcu->array_index) {
+ continue;
+ }
+ /* Choose fcurve with highest possible array index. */
+ if (fcu_prev == NULL ||
+ fcu_candidate->array_index > fcu_prev->array_index)
+ {
+ fcu_prev = fcu_candidate;
+ }
+ }
+ if (fcu_prev != NULL) {
+ OperationKey prev_driver_key(id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEG_OPCODE_DRIVER,
+ fcu_prev->rna_path,
+ fcu_prev->array_index);
+ OperationKey driver_key(id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEG_OPCODE_DRIVER,
+ fcu->rna_path,
+ fcu->array_index);
+ add_relation(prev_driver_key,
+ driver_key,
+ DEPSREL_TYPE_OPERATION,
+ "[Driver Order]");
+ }
+ }
+
/* prevent driver from occurring before own animation... */
if (adt->action || adt->nla_tracks.first) {
add_relation(adt_key, driver_key, DEPSREL_TYPE_OPERATION,
@@ -827,7 +848,11 @@ void DepsgraphRelationBuilder::build_animdata(ID *id)
void DepsgraphRelationBuilder::build_driver(ID *id, FCurve *fcu)
{
ChannelDriver *driver = fcu->driver;
- OperationKey driver_key(id, DEPSNODE_TYPE_PARAMETERS, DEG_OPCODE_DRIVER, deg_fcurve_id_name(fcu));
+ OperationKey driver_key(id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEG_OPCODE_DRIVER,
+ fcu->rna_path,
+ fcu->array_index);
bPoseChannel *pchan = NULL;
/* create dependency between driver and data affected by it */
@@ -942,7 +967,7 @@ void DepsgraphRelationBuilder::build_driver(ID *id, FCurve *fcu)
// XXX: the data itself could also set this, if it were to be truly initialised later?
/* loop over variables to get the target relationships */
- for (DriverVar *dvar = (DriverVar *)driver->variables.first; dvar; dvar = dvar->next) {
+ LINKLIST_FOREACH (DriverVar *, dvar, &driver->variables) {
/* only used targets */
DRIVER_TARGETS_USED_LOOPER(dvar)
{
@@ -1016,7 +1041,9 @@ void DepsgraphRelationBuilder::build_driver(ID *id, FCurve *fcu)
* so for now we'll be quite conservative here about optimization and consider
* all python drivers to be depending on time.
*/
- if (driver->type == DRIVER_TYPE_PYTHON) {
+ if ((driver->type == DRIVER_TYPE_PYTHON) &&
+ python_driver_depends_on_time(driver))
+ {
TimeSourceKey time_src_key;
add_relation(time_src_key, driver_key, DEPSREL_TYPE_TIME, "[TimeSrc -> Driver]");
}
@@ -1055,15 +1082,18 @@ void DepsgraphRelationBuilder::build_rigidbody(Scene *scene)
/* time dependency */
TimeSourceKey time_src_key;
- add_relation(time_src_key, init_key, DEPSREL_TYPE_TIME, "TimeSrc -> Rigidbody Reset/Rebuild (Optional)");
- add_relation(time_src_key, sim_key, DEPSREL_TYPE_TIME, "TimeSrc -> Rigidbody Sim Step");
+ add_relation(time_src_key,
+ init_key,
+ DEPSREL_TYPE_TIME,
+ "TimeSrc -> Rigidbody Reset/Rebuild (Optional)");
/* objects - simulation participants */
if (rbw->group) {
- for (GroupObject *go = (GroupObject *)rbw->group->gobject.first; go; go = go->next) {
+ LINKLIST_FOREACH (GroupObject *, go, &rbw->group->gobject) {
Object *ob = go->ob;
- if (!ob || ob->type != OB_MESH)
+ if (ob == NULL || ob->type != OB_MESH) {
continue;
+ }
/* hook up evaluation order...
* 1) flushing rigidbody results follows base transforms being applied
@@ -1078,7 +1108,6 @@ void DepsgraphRelationBuilder::build_rigidbody(Scene *scene)
eDepsOperation_Code trans_opcode = ob->parent ? DEG_OPCODE_TRANSFORM_PARENT : DEG_OPCODE_TRANSFORM_LOCAL;
OperationKey trans_op(&ob->id, DEPSNODE_TYPE_TRANSFORM, trans_opcode);
- add_relation(trans_op, rbo_key, DEPSREL_TYPE_OPERATION, "Base Ob Transform -> RBO Sync");
add_relation(sim_key, rbo_key, DEPSREL_TYPE_COMPONENT_ORDER, "Rigidbody Sim Eval -> RBO Sync");
/* if constraints exist, those depend on the result of the rigidbody sim
@@ -1090,31 +1119,44 @@ void DepsgraphRelationBuilder::build_rigidbody(Scene *scene)
* to control whether rigidbody eval gets interleaved into the constraint stack
*/
if (ob->constraints.first) {
- OperationKey constraint_key(&ob->id, DEPSNODE_TYPE_TRANSFORM, DEG_OPCODE_TRANSFORM_CONSTRAINTS);
- add_relation(rbo_key, constraint_key, DEPSREL_TYPE_COMPONENT_ORDER, "RBO Sync -> Ob Constraints");
+ OperationKey constraint_key(&ob->id,
+ DEPSNODE_TYPE_TRANSFORM,
+ DEG_OPCODE_TRANSFORM_CONSTRAINTS);
+ add_relation(rbo_key,
+ constraint_key,
+ DEPSREL_TYPE_COMPONENT_ORDER,
+ "RBO Sync -> Ob Constraints");
}
else {
- /* final object transform depends on rigidbody */
- OperationKey done_key(&ob->id, DEPSNODE_TYPE_TRANSFORM, DEG_OPCODE_TRANSFORM_FINAL);
- add_relation(rbo_key, done_key, DEPSREL_TYPE_COMPONENT_ORDER, "RBO Sync -> Done");
-
- // XXX: ubereval will be removed eventually, but we still need it in the meantime
- OperationKey uber_key(&ob->id, DEPSNODE_TYPE_TRANSFORM, DEG_OPCODE_OBJECT_UBEREVAL);
- add_relation(rbo_key, uber_key, DEPSREL_TYPE_COMPONENT_ORDER, "RBO Sync -> Uber (Temp)");
+ /* Final object transform depends on rigidbody.
+ *
+ * NOTE: Currently we consider final here an ubereval node.
+ * If it is gone we'll need to reconsider relation here.
+ */
+ OperationKey uber_key(&ob->id,
+ DEPSNODE_TYPE_TRANSFORM,
+ DEG_OPCODE_OBJECT_UBEREVAL);
+ add_relation(rbo_key,
+ uber_key,
+ DEPSREL_TYPE_COMPONENT_ORDER,
+ "RBO Sync -> Uber (Temp)");
}
-
- /* needed to get correct base values */
- add_relation(trans_op, sim_key, DEPSREL_TYPE_OPERATION, "Base Ob Transform -> Rigidbody Sim Eval");
+ /* Needed to get correct base values. */
+ add_relation(trans_op,
+ sim_key,
+ DEPSREL_TYPE_OPERATION,
+ "Base Ob Transform -> Rigidbody Sim Eval");
}
}
/* constraints */
if (rbw->constraints) {
- for (GroupObject *go = (GroupObject *)rbw->constraints->gobject.first; go; go = go->next) {
+ LINKLIST_FOREACH (GroupObject *, go, &rbw->constraints->gobject) {
Object *ob = go->ob;
- if (!ob || !ob->rigidbody_constraint)
+ if (ob == NULL || !ob->rigidbody_constraint) {
continue;
+ }
RigidBodyCon *rbc = ob->rigidbody_constraint;
@@ -1143,7 +1185,7 @@ void DepsgraphRelationBuilder::build_particles(Scene *scene, Object *ob)
DEG_OPCODE_GEOMETRY_UBEREVAL);
/* particle systems */
- for (ParticleSystem *psys = (ParticleSystem *)ob->particlesystem.first; psys; psys = psys->next) {
+ LINKLIST_FOREACH (ParticleSystem *, psys, &ob->particlesystem) {
ParticleSettings *part = psys->part;
/* particle settings */
@@ -1174,9 +1216,7 @@ void DepsgraphRelationBuilder::build_particles(Scene *scene, Object *ob)
#if 0
if (ELEM(part->phystype, PART_PHYS_KEYED, PART_PHYS_BOIDS)) {
- ParticleTarget *pt;
-
- for (pt = psys->targets.first; pt; pt = pt->next) {
+ LINKLIST_FOREACH (ParticleTarget *, pt, &psys->targets) {
if (pt->ob && BLI_findlink(&pt->ob->particlesystem, pt->psys - 1)) {
node2 = dag_get_node(dag, pt->ob);
dag_add_relation(dag, node2, node, DAG_RL_DATA_DATA | DAG_RL_OB_DATA, "Particle Targets");
@@ -1195,7 +1235,7 @@ void DepsgraphRelationBuilder::build_particles(Scene *scene, Object *ob)
}
if (part->ren_as == PART_DRAW_GR && part->dup_group) {
- for (go = part->dup_group->gobject.first; go; go = go->next) {
+ LINKLIST_FOREACH (GroupObject *, go, &part->dup_group->gobject) {
node2 = dag_get_node(dag, go->ob);
dag_add_relation(dag, node2, node, DAG_RL_OB_OB, "Particle Group Visualization");
}
@@ -1206,17 +1246,17 @@ void DepsgraphRelationBuilder::build_particles(Scene *scene, Object *ob)
if (part->type != PART_HAIR) {
add_collision_relations(psys_key, scene, ob, part->collision_group, ob->lay, true, "Particle Collision");
}
+ else if ((psys->flag & PSYS_HAIR_DYNAMICS) && psys->clmd && psys->clmd->coll_parms) {
+ add_collision_relations(psys_key, scene, ob, psys->clmd->coll_parms->group, ob->lay | scene->lay, true, "Hair Collision");
+ }
/* effectors */
add_forcefield_relations(psys_key, scene, ob, psys, part->effector_weights, part->type == PART_HAIR, "Particle Field");
/* boids */
if (part->boids) {
- BoidRule *rule = NULL;
- BoidState *state = NULL;
-
- for (state = (BoidState *)part->boids->states.first; state; state = state->next) {
- for (rule = (BoidRule *)state->rules.first; rule; rule = rule->next) {
+ LINKLIST_FOREACH (BoidState *, state, &part->boids->states) {
+ LINKLIST_FOREACH (BoidRule *, rule, &state->rules) {
Object *ruleob = NULL;
if (rule->type == eBoidRuleType_Avoid)
ruleob = ((BoidRuleGoalAvoid *)rule)->ob;
@@ -1256,391 +1296,6 @@ void DepsgraphRelationBuilder::build_particles(Scene *scene, Object *ob)
// TODO...
}
-/* IK Solver Eval Steps */
-void DepsgraphRelationBuilder::build_ik_pose(Object *ob,
- bPoseChannel *pchan,
- bConstraint *con,
- RootPChanMap *root_map)
-{
- bKinematicConstraint *data = (bKinematicConstraint *)con->data;
-
- /* attach owner to IK Solver too
- * - assume that owner is always part of chain
- * - see notes on direction of rel below...
- */
- bPoseChannel *rootchan = BKE_armature_ik_solver_find_root(pchan, data);
- OperationKey solver_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name, DEG_OPCODE_POSE_IK_SOLVER);
-
- /* IK target */
- // XXX: this should get handled as part of the constraint code
- if (data->tar != NULL) {
- /* TODO(sergey): For until we'll store partial matricies in the depsgraph,
- * we create dependency between target object and pose eval component.
- *
- * This way we ensuring the whole subtree is updated from scratch without
- * need of intermediate matricies. This is an overkill, but good enough for
- * testing IK solver.
- */
- // FIXME: geometry targets...
- ComponentKey pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
- if ((data->tar->type == OB_ARMATURE) && (data->subtarget[0])) {
- /* TODO(sergey): This is only for until granular update stores intermediate result. */
- if (data->tar != ob) {
- /* different armature - can just read the results */
- ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_BONE, data->subtarget);
- add_relation(target_key, pose_key, DEPSREL_TYPE_TRANSFORM, con->name);
- }
- else {
- /* same armature - we'll use the ready state only, just in case this bone is in the chain we're solving */
- OperationKey target_key(&data->tar->id, DEPSNODE_TYPE_BONE, data->subtarget, DEG_OPCODE_BONE_DONE);
- add_relation(target_key, solver_key, DEPSREL_TYPE_TRANSFORM, con->name);
- }
- }
- else if (ELEM(data->tar->type, OB_MESH, OB_LATTICE) && (data->subtarget[0])) {
- /* vertex group target */
- /* NOTE: for now, we don't need to represent vertex groups separately... */
- ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_GEOMETRY);
- add_relation(target_key, solver_key, DEPSREL_TYPE_GEOMETRY_EVAL, con->name);
-
- if (data->tar->type == OB_MESH) {
- OperationDepsNode *node2 = find_operation_node(target_key);
- if (node2 != NULL) {
- node2->customdata_mask |= CD_MASK_MDEFORMVERT;
- }
- }
- }
- else {
- /* Standard Object Target */
- ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_TRANSFORM);
- add_relation(target_key, pose_key, DEPSREL_TYPE_TRANSFORM, con->name);
- }
-
- if ((data->tar == ob) && (data->subtarget[0])) {
- /* Prevent target's constraints from linking to anything from same
- * chain that it controls.
- */
- root_map->add_bone(data->subtarget, rootchan->name);
- }
- }
-
- /* Pole Target */
- // XXX: this should get handled as part of the constraint code
- if (data->poletar != NULL) {
- if ((data->poletar->type == OB_ARMATURE) && (data->polesubtarget[0])) {
- // XXX: same armature issues - ready vs done?
- ComponentKey target_key(&data->poletar->id, DEPSNODE_TYPE_BONE, data->subtarget);
- add_relation(target_key, solver_key, DEPSREL_TYPE_TRANSFORM, con->name);
- }
- else if (ELEM(data->poletar->type, OB_MESH, OB_LATTICE) && (data->subtarget[0])) {
- /* vertex group target */
- /* NOTE: for now, we don't need to represent vertex groups separately... */
- ComponentKey target_key(&data->poletar->id, DEPSNODE_TYPE_GEOMETRY);
- add_relation(target_key, solver_key, DEPSREL_TYPE_GEOMETRY_EVAL, con->name);
-
- if (data->poletar->type == OB_MESH) {
- OperationDepsNode *node2 = find_operation_node(target_key);
- if (node2 != NULL) {
- node2->customdata_mask |= CD_MASK_MDEFORMVERT;
- }
- }
- }
- else {
- ComponentKey target_key(&data->poletar->id, DEPSNODE_TYPE_TRANSFORM);
- add_relation(target_key, solver_key, DEPSREL_TYPE_TRANSFORM, con->name);
- }
- }
-
- DEG_DEBUG_PRINTF("\nStarting IK Build: pchan = %s, target = (%s, %s), segcount = %d\n",
- pchan->name, data->tar->id.name, data->subtarget, data->rootbone);
-
- bPoseChannel *parchan = pchan;
- /* exclude tip from chain? */
- if (!(data->flag & CONSTRAINT_IK_TIP)) {
- OperationKey tip_transforms_key(&ob->id, DEPSNODE_TYPE_BONE,
- parchan->name, DEG_OPCODE_BONE_LOCAL);
- add_relation(solver_key, tip_transforms_key,
- DEPSREL_TYPE_TRANSFORM, "IK Solver Result");
- parchan = pchan->parent;
- }
-
- root_map->add_bone(parchan->name, rootchan->name);
-
- OperationKey parchan_transforms_key(&ob->id, DEPSNODE_TYPE_BONE,
- parchan->name, DEG_OPCODE_BONE_READY);
- add_relation(parchan_transforms_key, solver_key,
- DEPSREL_TYPE_TRANSFORM, "IK Solver Owner");
-
- /* Walk to the chain's root */
- //size_t segcount = 0;
- int segcount = 0;
-
- while (parchan) {
- /* Make IK-solver dependent on this bone's result,
- * since it can only run after the standard results
- * of the bone are know. Validate links step on the
- * bone will ensure that users of this bone only
- * grab the result with IK solver results...
- */
- if (parchan != pchan) {
- OperationKey parent_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_READY);
- add_relation(parent_key, solver_key, DEPSREL_TYPE_TRANSFORM, "IK Chain Parent");
-
- OperationKey done_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
- add_relation(solver_key, done_key, DEPSREL_TYPE_TRANSFORM, "IK Chain Result");
- }
- else {
- OperationKey final_transforms_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
- add_relation(solver_key, final_transforms_key, DEPSREL_TYPE_TRANSFORM, "IK Solver Result");
- }
- parchan->flag |= POSE_DONE;
-
-
- root_map->add_bone(parchan->name, rootchan->name);
-
- /* continue up chain, until we reach target number of items... */
- DEG_DEBUG_PRINTF(" %d = %s\n", segcount, parchan->name);
- segcount++;
- if ((segcount == data->rootbone) || (segcount > 255)) break; /* 255 is weak */
-
- parchan = parchan->parent;
- }
-
- OperationKey flush_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
- add_relation(solver_key, flush_key, DEPSREL_TYPE_OPERATION, "PoseEval Result-Bone Link");
-}
-
-/* Spline IK Eval Steps */
-void DepsgraphRelationBuilder::build_splineik_pose(Object *ob,
- bPoseChannel *pchan,
- bConstraint *con,
- RootPChanMap *root_map)
-{
- bSplineIKConstraint *data = (bSplineIKConstraint *)con->data;
- bPoseChannel *rootchan = BKE_armature_splineik_solver_find_root(pchan, data);
- OperationKey transforms_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_READY);
- OperationKey solver_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name, DEG_OPCODE_POSE_SPLINE_IK_SOLVER);
-
- /* attach owner to IK Solver too
- * - assume that owner is always part of chain
- * - see notes on direction of rel below...
- */
- add_relation(transforms_key, solver_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Solver Owner");
-
- /* attach path dependency to solver */
- if (data->tar) {
- /* TODO(sergey): For until we'll store partial matricies in the depsgraph,
- * we create dependency between target object and pose eval component.
- * See IK pose for a bit more information.
- */
- // TODO: the bigggest point here is that we need the curve PATH and not just the general geometry...
- ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_GEOMETRY);
- ComponentKey pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
- add_relation(target_key, pose_key, DEPSREL_TYPE_TRANSFORM, "[Curve.Path -> Spline IK] DepsRel");
- }
-
- pchan->flag |= POSE_DONE;
- OperationKey final_transforms_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_DONE);
- add_relation(solver_key, final_transforms_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Result");
-
- root_map->add_bone(pchan->name, rootchan->name);
-
- /* Walk to the chain's root */
- //size_t segcount = 0;
- int segcount = 0;
-
- for (bPoseChannel *parchan = pchan->parent; parchan; parchan = parchan->parent) {
- /* Make Spline IK solver dependent on this bone's result,
- * since it can only run after the standard results
- * of the bone are know. Validate links step on the
- * bone will ensure that users of this bone only
- * grab the result with IK solver results...
- */
- if (parchan != pchan) {
- OperationKey parent_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_READY);
- add_relation(parent_key, solver_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Solver Update");
-
- OperationKey done_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
- add_relation(solver_key, done_key, DEPSREL_TYPE_TRANSFORM, "IK Chain Result");
- }
- parchan->flag |= POSE_DONE;
-
- OperationKey final_transforms_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
- add_relation(solver_key, final_transforms_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Solver Result");
-
- root_map->add_bone(parchan->name, rootchan->name);
-
- /* continue up chain, until we reach target number of items... */
- segcount++;
- if ((segcount == data->chainlen) || (segcount > 255)) break; /* 255 is weak */
- }
-
- OperationKey flush_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
- add_relation(solver_key, flush_key, DEPSREL_TYPE_OPERATION, "PoseEval Result-Bone Link");
-}
-
-/* Pose/Armature Bones Graph */
-void DepsgraphRelationBuilder::build_rig(Scene *scene, Object *ob)
-{
- /* Armature-Data */
- bArmature *arm = (bArmature *)ob->data;
-
- // TODO: selection status?
-
- /* attach links between pose operations */
- OperationKey init_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_INIT);
- OperationKey flush_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
-
- add_relation(init_key, flush_key, DEPSREL_TYPE_COMPONENT_ORDER, "[Pose Init -> Pose Cleanup]");
-
- /* Make sure pose is up-to-date with armature updates. */
- OperationKey armature_key(&arm->id,
- DEPSNODE_TYPE_PARAMETERS,
- DEG_OPCODE_PLACEHOLDER,
- "Armature Eval");
- add_relation(armature_key, init_key, DEPSREL_TYPE_COMPONENT_ORDER, "Data dependency");
-
- if (ob->adt && (ob->adt->action || ob->adt->nla_tracks.first)) {
- ComponentKey animation_key(&ob->id, DEPSNODE_TYPE_ANIMATION);
- add_relation(animation_key, init_key, DEPSREL_TYPE_OPERATION, "Rig Animation");
- }
-
- /* IK Solvers...
- * - These require separate processing steps are pose-level
- * to be executed between chains of bones (i.e. once the
- * base transforms of a bunch of bones is done)
- *
- * - We build relations for these before the dependencies
- * between ops in the same component as it is necessary
- * to check whether such bones are in the same IK chain
- * (or else we get weird issues with either in-chain
- * references, or with bones being parented to IK'd bones)
- *
- * Unsolved Issues:
- * - Care is needed to ensure that multi-headed trees work out the same as in ik-tree building
- * - Animated chain-lengths are a problem...
- */
- RootPChanMap root_map;
- bool pose_depends_on_local_transform = false;
- for (bPoseChannel *pchan = (bPoseChannel *)ob->pose->chanbase.first; pchan; pchan = pchan->next) {
- for (bConstraint *con = (bConstraint *)pchan->constraints.first; con; con = con->next) {
- switch (con->type) {
- case CONSTRAINT_TYPE_KINEMATIC:
- build_ik_pose(ob, pchan, con, &root_map);
- pose_depends_on_local_transform = true;
- break;
-
- case CONSTRAINT_TYPE_SPLINEIK:
- build_splineik_pose(ob, pchan, con, &root_map);
- pose_depends_on_local_transform = true;
- break;
-
- /* Constraints which needs world's matrix for transform.
- * TODO(sergey): More constraints here?
- */
- case CONSTRAINT_TYPE_ROTLIKE:
- case CONSTRAINT_TYPE_SIZELIKE:
- case CONSTRAINT_TYPE_LOCLIKE:
- case CONSTRAINT_TYPE_TRANSLIKE:
- /* TODO(sergey): Add used space check. */
- pose_depends_on_local_transform = true;
- break;
-
- default:
- break;
- }
- }
- }
- //root_map.print_debug();
-
- if (pose_depends_on_local_transform) {
- /* TODO(sergey): Once partial updates are possible use relation between
- * object transform and solver itself in it's build function.
- */
- ComponentKey pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
- ComponentKey local_transform_key(&ob->id, DEPSNODE_TYPE_TRANSFORM);
- add_relation(local_transform_key, pose_key, DEPSREL_TYPE_TRANSFORM, "Local Transforms");
- }
-
-
- /* links between operations for each bone */
- for (bPoseChannel *pchan = (bPoseChannel *)ob->pose->chanbase.first; pchan; pchan = pchan->next) {
- OperationKey bone_local_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_LOCAL);
- OperationKey bone_pose_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_POSE_PARENT);
- OperationKey bone_ready_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_READY);
- OperationKey bone_done_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_DONE);
-
- pchan->flag &= ~POSE_DONE;
-
- /* pose init to bone local */
- add_relation(init_key, bone_local_key, DEPSREL_TYPE_OPERATION, "PoseEval Source-Bone Link");
-
- /* local to pose parenting operation */
- add_relation(bone_local_key, bone_pose_key, DEPSREL_TYPE_OPERATION, "Bone Local - PoseSpace Link");
-
- /* parent relation */
- if (pchan->parent != NULL) {
- eDepsOperation_Code parent_key_opcode;
-
- /* NOTE: this difference in handling allows us to prevent lockups while ensuring correct poses for separate chains */
- if (root_map.has_common_root(pchan->name, pchan->parent->name)) {
- parent_key_opcode = DEG_OPCODE_BONE_READY;
- }
- else {
- parent_key_opcode = DEG_OPCODE_BONE_DONE;
- }
-
- OperationKey parent_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->parent->name, parent_key_opcode);
- add_relation(parent_key, bone_pose_key, DEPSREL_TYPE_TRANSFORM, "[Parent Bone -> Child Bone]");
- }
-
- /* constraints */
- if (pchan->constraints.first != NULL) {
- /* constraints stack and constraint dependencies */
- build_constraints(scene, &ob->id, DEPSNODE_TYPE_BONE, pchan->name, &pchan->constraints, &root_map);
-
- /* pose -> constraints */
- OperationKey constraints_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_CONSTRAINTS);
- add_relation(bone_pose_key, constraints_key, DEPSREL_TYPE_OPERATION, "Constraints Stack");
-
- /* constraints -> ready */
- // TODO: when constraint stack is exploded, this step should occur before the first IK solver
- add_relation(constraints_key, bone_ready_key, DEPSREL_TYPE_OPERATION, "Constraints -> Ready");
- }
- else {
- /* pose -> ready */
- add_relation(bone_pose_key, bone_ready_key, DEPSREL_TYPE_OPERATION, "Pose -> Ready");
- }
-
- /* bone ready -> done
- * NOTE: For bones without IK, this is all that's needed.
- * For IK chains however, an additional rel is created from IK to done,
- * with transitive reduction removing this one...
- */
- add_relation(bone_ready_key, bone_done_key, DEPSREL_TYPE_OPERATION, "Ready -> Done");
-
- /* assume that all bones must be done for the pose to be ready (for deformers) */
- add_relation(bone_done_key, flush_key, DEPSREL_TYPE_OPERATION, "PoseEval Result-Bone Link");
- }
-}
-
-void DepsgraphRelationBuilder::build_proxy_rig(Object *ob)
-{
- OperationKey pose_init_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_INIT);
- OperationKey pose_done_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
- for (bPoseChannel *pchan = (bPoseChannel *)ob->pose->chanbase.first;
- pchan != NULL;
- pchan = pchan->next)
- {
- OperationKey bone_local_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_LOCAL);
- OperationKey bone_ready_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_READY);
- OperationKey bone_done_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_DONE);
- add_relation(pose_init_key, bone_local_key, DEPSREL_TYPE_OPERATION, "Pose Init -> Bone Local");
- add_relation(bone_local_key, bone_ready_key, DEPSREL_TYPE_OPERATION, "Local -> Ready");
- add_relation(bone_ready_key, bone_done_key, DEPSREL_TYPE_OPERATION, "Ready -> Done");
- add_relation(bone_done_key, pose_done_key, DEPSREL_TYPE_OPERATION, "Bone Done -> Pose Done");
- }
-}
-
/* Shapekeys */
void DepsgraphRelationBuilder::build_shapekeys(ID *obdata, Key *key)
{
@@ -1701,10 +1356,9 @@ void DepsgraphRelationBuilder::build_obdata_geom(Main *bmain, Scene *scene, Obje
/* Modifiers */
if (ob->modifiers.first) {
- ModifierData *md;
OperationKey prev_mod_key;
- for (md = (ModifierData *)ob->modifiers.first; md; md = md->next) {
+ LINKLIST_FOREACH (ModifierData *, md, &ob->modifiers) {
const ModifierTypeInfo *mti = modifierType_getInfo((ModifierType)md->type);
OperationKey mod_key(&ob->id, DEPSNODE_TYPE_GEOMETRY, DEG_OPCODE_GEOMETRY_MODIFIER, md->name);
@@ -1737,7 +1391,7 @@ void DepsgraphRelationBuilder::build_obdata_geom(Main *bmain, Scene *scene, Obje
* for either the modifier needing time, or that it is animated.
*/
/* XXX: Remove this hack when these links are added as part of build_animdata() instead */
- if (modifier_dependsOnTime(md) == false) {
+ if (modifier_dependsOnTime(md) == false && needs_animdata_node(&ob->id)) {
ComponentKey animation_key(&ob->id, DEPSNODE_TYPE_ANIMATION);
add_relation(animation_key, mod_key, DEPSREL_TYPE_OPERATION, "Modifier Animation");
}
@@ -1820,15 +1474,18 @@ void DepsgraphRelationBuilder::build_obdata_geom(Main *bmain, Scene *scene, Obje
// XXX: these needs geom data, but where is geom stored?
if (cu->bevobj) {
ComponentKey bevob_key(&cu->bevobj->id, DEPSNODE_TYPE_GEOMETRY);
+ build_object(bmain, scene, cu->bevobj);
add_relation(bevob_key, geom_key, DEPSREL_TYPE_GEOMETRY_EVAL, "Curve Bevel");
}
if (cu->taperobj) {
ComponentKey taperob_key(&cu->taperobj->id, DEPSNODE_TYPE_GEOMETRY);
+ build_object(bmain, scene, cu->taperobj);
add_relation(taperob_key, geom_key, DEPSREL_TYPE_GEOMETRY_EVAL, "Curve Taper");
}
if (ob->type == OB_FONT) {
if (cu->textoncurve) {
- ComponentKey textoncurve_key(&cu->taperobj->id, DEPSNODE_TYPE_GEOMETRY);
+ ComponentKey textoncurve_key(&cu->textoncurve->id, DEPSNODE_TYPE_GEOMETRY);
+ build_object(bmain, scene, cu->textoncurve);
add_relation(textoncurve_key, geom_key, DEPSREL_TYPE_GEOMETRY_EVAL, "Text on Curve");
}
}
@@ -1936,7 +1593,7 @@ void DepsgraphRelationBuilder::build_nodetree(ID *owner, bNodeTree *ntree)
"Parameters Eval");
/* nodetree's nodes... */
- for (bNode *bnode = (bNode *)ntree->nodes.first; bnode; bnode = bnode->next) {
+ LINKLIST_FOREACH (bNode *, bnode, &ntree->nodes) {
if (bnode->id) {
if (GS(bnode->id->name) == ID_MA) {
build_material(owner, (Material *)bnode->id);
@@ -2035,9 +1692,26 @@ bool DepsgraphRelationBuilder::needs_animdata_node(ID *id)
{
AnimData *adt = BKE_animdata_from_id(id);
if (adt != NULL) {
- return adt->action != NULL;
+ return (adt->action != NULL) || (adt->nla_tracks.first != NULL);
}
return false;
}
+void DepsgraphRelationBuilder::build_cachefile(CacheFile *cache_file) {
+ /* Animation. */
+ build_animdata(&cache_file->id);
+}
+
+void DepsgraphRelationBuilder::build_mask(Mask *mask)
+{
+ /* Animation. */
+ build_animdata(&mask->id);
+}
+
+void DepsgraphRelationBuilder::build_movieclip(MovieClip *clip)
+{
+ /* Animation. */
+ build_animdata(&clip->id);
+}
+
} // namespace DEG
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations.h b/source/blender/depsgraph/intern/builder/deg_builder_relations.h
index 46e65d4..6e8485b 100644
--- a/source/blender/depsgraph/intern/builder/deg_builder_relations.h
+++ b/source/blender/depsgraph/intern/builder/deg_builder_relations.h
@@ -47,6 +47,7 @@
struct Base;
struct bGPdata;
+struct CacheFile;
struct ListBase;
struct GHash;
struct ID;
@@ -54,8 +55,10 @@ struct FCurve;
struct Group;
struct Key;
struct Main;
+struct Mask;
struct Material;
struct MTex;
+struct MovieClip;
struct bNodeTree;
struct Object;
struct bPoseChannel;
@@ -81,108 +84,83 @@ struct ComponentDepsNode;
struct OperationDepsNode;
struct RootPChanMap;
-struct RootKey
-{
- RootKey() {}
+struct RootKey {
+ RootKey();
};
struct TimeSourceKey
{
- TimeSourceKey() : id(NULL) {}
- TimeSourceKey(ID *id) : id(id) {}
+ TimeSourceKey();
+ TimeSourceKey(ID *id);
- string identifier() const
- {
- return string("TimeSourceKey");
- }
+ string identifier() const;
ID *id;
};
struct ComponentKey
{
- ComponentKey() :
- id(NULL), type(DEPSNODE_TYPE_UNDEFINED), name("")
- {}
- ComponentKey(ID *id, eDepsNode_Type type, const string &name = "") :
- id(id), type(type), name(name)
- {}
-
- string identifier() const
- {
- const char *idname = (id) ? id->name : "<None>";
+ ComponentKey();
+ ComponentKey(ID *id, eDepsNode_Type type, const char *name = "");
- char typebuf[5];
- BLI_snprintf(typebuf, sizeof(typebuf), "%d", type);
-
- return string("ComponentKey(") + idname + ", " + typebuf + ", '" + name + "')";
- }
+ string identifier() const;
ID *id;
eDepsNode_Type type;
- string name;
+ const char *name;
};
struct OperationKey
{
- OperationKey() :
- id(NULL), component_type(DEPSNODE_TYPE_UNDEFINED), component_name(""), opcode(DEG_OPCODE_OPERATION), name("")
- {}
-
- OperationKey(ID *id, eDepsNode_Type component_type, const string &name) :
- id(id), component_type(component_type), component_name(""), opcode(DEG_OPCODE_OPERATION), name(name)
- {}
- OperationKey(ID *id, eDepsNode_Type component_type, const string &component_name, const string &name) :
- id(id), component_type(component_type), component_name(component_name), opcode(DEG_OPCODE_OPERATION), name(name)
- {}
-
- OperationKey(ID *id, eDepsNode_Type component_type, eDepsOperation_Code opcode) :
- id(id), component_type(component_type), component_name(""), opcode(opcode), name("")
- {}
- OperationKey(ID *id, eDepsNode_Type component_type, const string &component_name, eDepsOperation_Code opcode) :
- id(id), component_type(component_type), component_name(component_name), opcode(opcode), name("")
- {}
-
- OperationKey(ID *id, eDepsNode_Type component_type, eDepsOperation_Code opcode, const string &name) :
- id(id), component_type(component_type), component_name(""), opcode(opcode), name(name)
- {}
- OperationKey(ID *id, eDepsNode_Type component_type, const string &component_name, eDepsOperation_Code opcode, const string &name) :
- id(id), component_type(component_type), component_name(component_name), opcode(opcode), name(name)
- {}
-
- string identifier() const
- {
- char typebuf[5];
- BLI_snprintf(typebuf, sizeof(typebuf), "%d", component_type);
-
- return string("OperationKey(") + "t: " + typebuf + ", cn: '" + component_name + "', c: " + DEG_OPNAMES[opcode] + ", n: '" + name + "')";
- }
-
+ OperationKey();
+ OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *name,
+ int name_tag = -1);
+ OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *component_name,
+ const char *name,
+ int name_tag);
+
+ OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ eDepsOperation_Code opcode);
+ OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *component_name,
+ eDepsOperation_Code opcode);
+
+ OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag = -1);
+ OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *component_name,
+ eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag = -1);
+
+ string identifier() const;
ID *id;
eDepsNode_Type component_type;
- string component_name;
+ const char *component_name;
eDepsOperation_Code opcode;
- string name;
+ const char *name;
+ int name_tag;
};
struct RNAPathKey
{
- // Note: see depsgraph_build.cpp for implementation
+ /* NOTE: see depsgraph_build.cpp for implementation */
RNAPathKey(ID *id, const char *path);
- RNAPathKey(ID *id, const PointerRNA &ptr, PropertyRNA *prop) :
- id(id), ptr(ptr), prop(prop)
- {}
-
- string identifier() const
- {
- const char *id_name = (id) ? id->name : "<No ID>";
- const char *prop_name = (prop) ? RNA_property_identifier(prop) : "<No Prop>";
-
- return string("RnaPathKey(") + "id: " + id_name + ", prop: " + prop_name + "')";
- }
+ RNAPathKey(ID *id, const PointerRNA &ptr, PropertyRNA *prop);
+ string identifier() const;
ID *id;
PointerRNA ptr;
@@ -245,6 +223,9 @@ struct DepsgraphRelationBuilder
void build_texture_stack(ID *owner, MTex **texture_stack);
void build_compositor(Scene *scene);
void build_gpencil(ID *owner, bGPdata *gpd);
+ void build_cachefile(CacheFile *cache_file);
+ void build_mask(Mask *mask);
+ void build_movieclip(MovieClip *clip);
void add_collision_relations(const OperationKey &key, Scene *scene, Object *ob, Group *group, int layer, bool dupli, const char *name);
void add_forcefield_relations(const OperationKey &key, Scene *scene, Object *ob, ParticleSystem *psys, EffectorWeights *eff, bool add_absorption, const char *name);
@@ -270,7 +251,7 @@ protected:
template <typename KeyType>
DepsNodeHandle create_node_handle(const KeyType& key,
- const string& default_name = "");
+ const char *default_name = "");
bool needs_animdata_node(ID *id);
@@ -280,7 +261,7 @@ private:
struct DepsNodeHandle
{
- DepsNodeHandle(DepsgraphRelationBuilder *builder, OperationDepsNode *node, const string &default_name = "") :
+ DepsNodeHandle(DepsgraphRelationBuilder *builder, OperationDepsNode *node, const char *default_name = "") :
builder(builder),
node(node),
default_name(default_name)
@@ -290,7 +271,7 @@ struct DepsNodeHandle
DepsgraphRelationBuilder *builder;
OperationDepsNode *node;
- const string &default_name;
+ const char *default_name;
};
/* Utilities for Builders ----------------------------------------------------- */
@@ -318,6 +299,7 @@ void DepsgraphRelationBuilder::add_relation(const KeyFrom &key_from,
else {
if (!op_from) {
/* XXX TODO handle as error or report if needed */
+ node_from = find_node(key_from);
fprintf(stderr, "add_relation(%d, %s) - Could not find op_from (%s)\n",
type, description, key_from.identifier().c_str());
}
@@ -370,10 +352,12 @@ void DepsgraphRelationBuilder::add_node_handle_relation(
}
else {
if (!op_from) {
- /* XXX TODO handle as error or report if needed */
+ fprintf(stderr, "add_node_handle_relation(%d, %s) - Could not find op_from (%s)\n",
+ type, description, key_from.identifier().c_str());
}
if (!op_to) {
- /* XXX TODO handle as error or report if needed */
+ fprintf(stderr, "add_node_handle_relation(%d, %s) - Could not find op_to (%s)\n",
+ type, description, key_from.identifier().c_str());
}
}
}
@@ -381,7 +365,7 @@ void DepsgraphRelationBuilder::add_node_handle_relation(
template <typename KeyType>
DepsNodeHandle DepsgraphRelationBuilder::create_node_handle(
const KeyType &key,
- const string &default_name)
+ const char *default_name)
{
return DepsNodeHandle(this, find_node(key), default_name);
}
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations_keys.cc b/source/blender/depsgraph/intern/builder/deg_builder_relations_keys.cc
new file mode 100644
index 0000000..feae8bc
--- /dev/null
+++ b/source/blender/depsgraph/intern/builder/deg_builder_relations_keys.cc
@@ -0,0 +1,211 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2013 Blender Foundation.
+ * All rights reserved.
+ *
+ * Original Author: Joshua Leung
+ * Contributor(s): Based on original depsgraph.c code - Blender Foundation (2005-2013)
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/depsgraph/intern/builder/deg_builder_relations_keys.cc
+ * \ingroup depsgraph
+ *
+ * Methods for constructing depsgraph
+ */
+
+#include "intern/builder/deg_builder_relations.h"
+
+namespace DEG {
+
+/////////////////////////////////////////
+// Root.
+
+RootKey::RootKey()
+{
+}
+
+/////////////////////////////////////////
+// Time source.
+
+TimeSourceKey::TimeSourceKey()
+ : id(NULL)
+{
+}
+
+TimeSourceKey::TimeSourceKey(ID *id)
+ : id(id)
+{
+}
+
+string TimeSourceKey::identifier() const
+{
+ return string("TimeSourceKey");
+}
+
+/////////////////////////////////////////
+// Component.
+
+ComponentKey::ComponentKey()
+ : id(NULL),
+ type(DEPSNODE_TYPE_UNDEFINED),
+ name("")
+{
+}
+
+ComponentKey::ComponentKey(ID *id, eDepsNode_Type type, const char *name)
+ : id(id),
+ type(type),
+ name(name)
+{
+}
+
+string ComponentKey::identifier() const
+{
+ const char *idname = (id) ? id->name : "<None>";
+ char typebuf[5];
+ BLI_snprintf(typebuf, sizeof(typebuf), "%d", type);
+ return string("ComponentKey(") +
+ idname + ", " + typebuf + ", '" + name + "')";
+}
+
+/////////////////////////////////////////
+// Operation.
+
+OperationKey::OperationKey()
+ : id(NULL),
+ component_type(DEPSNODE_TYPE_UNDEFINED),
+ component_name(""),
+ opcode(DEG_OPCODE_OPERATION),
+ name(""),
+ name_tag(-1)
+{
+}
+
+OperationKey::OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *name,
+ int name_tag)
+ : id(id),
+ component_type(component_type),
+ component_name(""),
+ opcode(DEG_OPCODE_OPERATION),
+ name(name),
+ name_tag(name_tag)
+{
+}
+
+OperationKey::OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *component_name,
+ const char *name,
+ int name_tag)
+ : id(id),
+ component_type(component_type),
+ component_name(component_name),
+ opcode(DEG_OPCODE_OPERATION),
+ name(name),
+ name_tag(name_tag)
+{
+}
+
+OperationKey::OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ eDepsOperation_Code opcode)
+ : id(id),
+ component_type(component_type),
+ component_name(""),
+ opcode(opcode),
+ name(""),
+ name_tag(-1)
+{
+}
+
+OperationKey::OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *component_name,
+ eDepsOperation_Code opcode)
+ : id(id),
+ component_type(component_type),
+ component_name(component_name),
+ opcode(opcode),
+ name(""),
+ name_tag(-1)
+{
+}
+
+OperationKey::OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag)
+ : id(id),
+ component_type(component_type),
+ component_name(""),
+ opcode(opcode),
+ name(name),
+ name_tag(name_tag)
+{
+}
+
+OperationKey::OperationKey(ID *id,
+ eDepsNode_Type component_type,
+ const char *component_name,
+ eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag)
+ : id(id),
+ component_type(component_type),
+ component_name(component_name),
+ opcode(opcode),
+ name(name),
+ name_tag(name_tag)
+{
+}
+
+string OperationKey::identifier() const
+{
+ char typebuf[5];
+ BLI_snprintf(typebuf, sizeof(typebuf), "%d", component_type);
+ return string("OperationKey(") +
+ "t: " + typebuf +
+ ", cn: '" + component_name +
+ "', c: " + DEG_OPNAMES[opcode] +
+ ", n: '" + name + "')";
+}
+
+/////////////////////////////////////////
+// RNA path.
+
+RNAPathKey::RNAPathKey(ID *id, const PointerRNA &ptr, PropertyRNA *prop)
+ : id(id),
+ ptr(ptr),
+ prop(prop)
+{
+}
+
+string RNAPathKey::identifier() const
+{
+ const char *id_name = (id) ? id->name : "<No ID>";
+ const char *prop_name = (prop) ? RNA_property_identifier(prop) : "<No Prop>";
+ return string("RnaPathKey(") + "id: " + id_name +
+ ", prop: " + prop_name + "')";
+}
+
+} // namespace DEG
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations_rig.cc b/source/blender/depsgraph/intern/builder/deg_builder_relations_rig.cc
new file mode 100644
index 0000000..2b4c000
--- /dev/null
+++ b/source/blender/depsgraph/intern/builder/deg_builder_relations_rig.cc
@@ -0,0 +1,455 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2013 Blender Foundation.
+ * All rights reserved.
+ *
+ * Original Author: Joshua Leung
+ * Contributor(s): Based on original depsgraph.c code - Blender Foundation (2005-2013)
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/depsgraph/intern/builder/deg_builder_relations_rig.cc
+ * \ingroup depsgraph
+ *
+ * Methods for constructing depsgraph
+ */
+
+#include "intern/builder/deg_builder_relations.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cstring> /* required for STREQ later on. */
+
+#include "MEM_guardedalloc.h"
+
+extern "C" {
+#include "BLI_blenlib.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_action_types.h"
+#include "DNA_anim_types.h"
+#include "DNA_armature_types.h"
+#include "DNA_constraint_types.h"
+#include "DNA_customdata_types.h"
+#include "DNA_object_types.h"
+
+#include "BKE_action.h"
+#include "BKE_armature.h"
+
+#include "DEG_depsgraph.h"
+#include "DEG_depsgraph_build.h"
+} /* extern "C" */
+
+#include "intern/builder/deg_builder.h"
+#include "intern/builder/deg_builder_pchanmap.h"
+
+#include "intern/nodes/deg_node.h"
+#include "intern/nodes/deg_node_component.h"
+#include "intern/nodes/deg_node_operation.h"
+
+#include "intern/depsgraph_intern.h"
+#include "intern/depsgraph_types.h"
+
+#include "util/deg_util_foreach.h"
+
+namespace DEG {
+
+/* IK Solver Eval Steps */
+void DepsgraphRelationBuilder::build_ik_pose(Object *ob,
+ bPoseChannel *pchan,
+ bConstraint *con,
+ RootPChanMap *root_map)
+{
+ bKinematicConstraint *data = (bKinematicConstraint *)con->data;
+
+ /* attach owner to IK Solver too
+ * - assume that owner is always part of chain
+ * - see notes on direction of rel below...
+ */
+ bPoseChannel *rootchan = BKE_armature_ik_solver_find_root(pchan, data);
+ OperationKey solver_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name, DEG_OPCODE_POSE_IK_SOLVER);
+
+ /* IK target */
+ // XXX: this should get handled as part of the constraint code
+ if (data->tar != NULL) {
+ /* TODO(sergey): For until we'll store partial matricies in the depsgraph,
+ * we create dependency between target object and pose eval component.
+ *
+ * This way we ensuring the whole subtree is updated from scratch without
+ * need of intermediate matricies. This is an overkill, but good enough for
+ * testing IK solver.
+ */
+ // FIXME: geometry targets...
+ ComponentKey pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
+ if ((data->tar->type == OB_ARMATURE) && (data->subtarget[0])) {
+ /* TODO(sergey): This is only for until granular update stores intermediate result. */
+ if (data->tar != ob) {
+ /* different armature - can just read the results */
+ ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_BONE, data->subtarget);
+ add_relation(target_key, pose_key, DEPSREL_TYPE_TRANSFORM, con->name);
+ }
+ else {
+ /* same armature - we'll use the ready state only, just in case this bone is in the chain we're solving */
+ OperationKey target_key(&data->tar->id, DEPSNODE_TYPE_BONE, data->subtarget, DEG_OPCODE_BONE_DONE);
+ add_relation(target_key, solver_key, DEPSREL_TYPE_TRANSFORM, con->name);
+ }
+ }
+ else if (ELEM(data->tar->type, OB_MESH, OB_LATTICE) && (data->subtarget[0])) {
+ /* vertex group target */
+ /* NOTE: for now, we don't need to represent vertex groups separately... */
+ ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_GEOMETRY);
+ add_relation(target_key, solver_key, DEPSREL_TYPE_GEOMETRY_EVAL, con->name);
+
+ if (data->tar->type == OB_MESH) {
+ OperationDepsNode *node2 = find_operation_node(target_key);
+ if (node2 != NULL) {
+ node2->customdata_mask |= CD_MASK_MDEFORMVERT;
+ }
+ }
+ }
+ else {
+ /* Standard Object Target */
+ ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_TRANSFORM);
+ add_relation(target_key, pose_key, DEPSREL_TYPE_TRANSFORM, con->name);
+ }
+
+ if ((data->tar == ob) && (data->subtarget[0])) {
+ /* Prevent target's constraints from linking to anything from same
+ * chain that it controls.
+ */
+ root_map->add_bone(data->subtarget, rootchan->name);
+ }
+ }
+
+ /* Pole Target */
+ // XXX: this should get handled as part of the constraint code
+ if (data->poletar != NULL) {
+ if ((data->poletar->type == OB_ARMATURE) && (data->polesubtarget[0])) {
+ // XXX: same armature issues - ready vs done?
+ ComponentKey target_key(&data->poletar->id, DEPSNODE_TYPE_BONE, data->polesubtarget);
+ add_relation(target_key, solver_key, DEPSREL_TYPE_TRANSFORM, con->name);
+ }
+ else if (ELEM(data->poletar->type, OB_MESH, OB_LATTICE) && (data->polesubtarget[0])) {
+ /* vertex group target */
+ /* NOTE: for now, we don't need to represent vertex groups separately... */
+ ComponentKey target_key(&data->poletar->id, DEPSNODE_TYPE_GEOMETRY);
+ add_relation(target_key, solver_key, DEPSREL_TYPE_GEOMETRY_EVAL, con->name);
+
+ if (data->poletar->type == OB_MESH) {
+ OperationDepsNode *node2 = find_operation_node(target_key);
+ if (node2 != NULL) {
+ node2->customdata_mask |= CD_MASK_MDEFORMVERT;
+ }
+ }
+ }
+ else {
+ ComponentKey target_key(&data->poletar->id, DEPSNODE_TYPE_TRANSFORM);
+ add_relation(target_key, solver_key, DEPSREL_TYPE_TRANSFORM, con->name);
+ }
+ }
+
+ DEG_DEBUG_PRINTF("\nStarting IK Build: pchan = %s, target = (%s, %s), segcount = %d\n",
+ pchan->name, data->tar->id.name, data->subtarget, data->rootbone);
+
+ bPoseChannel *parchan = pchan;
+ /* exclude tip from chain? */
+ if (!(data->flag & CONSTRAINT_IK_TIP)) {
+ OperationKey tip_transforms_key(&ob->id, DEPSNODE_TYPE_BONE,
+ parchan->name, DEG_OPCODE_BONE_LOCAL);
+ add_relation(solver_key, tip_transforms_key,
+ DEPSREL_TYPE_TRANSFORM, "IK Solver Result");
+ parchan = pchan->parent;
+ }
+
+ root_map->add_bone(parchan->name, rootchan->name);
+
+ OperationKey parchan_transforms_key(&ob->id, DEPSNODE_TYPE_BONE,
+ parchan->name, DEG_OPCODE_BONE_READY);
+ add_relation(parchan_transforms_key, solver_key,
+ DEPSREL_TYPE_TRANSFORM, "IK Solver Owner");
+
+ /* Walk to the chain's root */
+ //size_t segcount = 0;
+ int segcount = 0;
+
+ while (parchan) {
+ /* Make IK-solver dependent on this bone's result,
+ * since it can only run after the standard results
+ * of the bone are know. Validate links step on the
+ * bone will ensure that users of this bone only
+ * grab the result with IK solver results...
+ */
+ if (parchan != pchan) {
+ OperationKey parent_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_READY);
+ add_relation(parent_key, solver_key, DEPSREL_TYPE_TRANSFORM, "IK Chain Parent");
+
+ OperationKey done_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
+ add_relation(solver_key, done_key, DEPSREL_TYPE_TRANSFORM, "IK Chain Result");
+ }
+ else {
+ OperationKey final_transforms_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
+ add_relation(solver_key, final_transforms_key, DEPSREL_TYPE_TRANSFORM, "IK Solver Result");
+ }
+ parchan->flag |= POSE_DONE;
+
+
+ root_map->add_bone(parchan->name, rootchan->name);
+
+ /* continue up chain, until we reach target number of items... */
+ DEG_DEBUG_PRINTF(" %d = %s\n", segcount, parchan->name);
+ segcount++;
+ if ((segcount == data->rootbone) || (segcount > 255)) break; /* 255 is weak */
+
+ parchan = parchan->parent;
+ }
+
+ OperationKey flush_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
+ add_relation(solver_key, flush_key, DEPSREL_TYPE_OPERATION, "PoseEval Result-Bone Link");
+}
+
+/* Spline IK Eval Steps */
+void DepsgraphRelationBuilder::build_splineik_pose(Object *ob,
+ bPoseChannel *pchan,
+ bConstraint *con,
+ RootPChanMap *root_map)
+{
+ bSplineIKConstraint *data = (bSplineIKConstraint *)con->data;
+ bPoseChannel *rootchan = BKE_armature_splineik_solver_find_root(pchan, data);
+ OperationKey transforms_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_READY);
+ OperationKey solver_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, rootchan->name, DEG_OPCODE_POSE_SPLINE_IK_SOLVER);
+
+ /* attach owner to IK Solver too
+ * - assume that owner is always part of chain
+ * - see notes on direction of rel below...
+ */
+ add_relation(transforms_key, solver_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Solver Owner");
+
+ /* attach path dependency to solver */
+ if (data->tar) {
+ /* TODO(sergey): For until we'll store partial matricies in the depsgraph,
+ * we create dependency between target object and pose eval component.
+ * See IK pose for a bit more information.
+ */
+ // TODO: the bigggest point here is that we need the curve PATH and not just the general geometry...
+ ComponentKey target_key(&data->tar->id, DEPSNODE_TYPE_GEOMETRY);
+ ComponentKey pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
+ add_relation(target_key, pose_key, DEPSREL_TYPE_TRANSFORM, "[Curve.Path -> Spline IK] DepsRel");
+ }
+
+ pchan->flag |= POSE_DONE;
+ OperationKey final_transforms_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_DONE);
+ add_relation(solver_key, final_transforms_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Result");
+
+ root_map->add_bone(pchan->name, rootchan->name);
+
+ /* Walk to the chain's root */
+ //size_t segcount = 0;
+ int segcount = 0;
+
+ for (bPoseChannel *parchan = pchan->parent; parchan; parchan = parchan->parent) {
+ /* Make Spline IK solver dependent on this bone's result,
+ * since it can only run after the standard results
+ * of the bone are know. Validate links step on the
+ * bone will ensure that users of this bone only
+ * grab the result with IK solver results...
+ */
+ if (parchan != pchan) {
+ OperationKey parent_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_READY);
+ add_relation(parent_key, solver_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Solver Update");
+
+ OperationKey done_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
+ add_relation(solver_key, done_key, DEPSREL_TYPE_TRANSFORM, "IK Chain Result");
+ }
+ parchan->flag |= POSE_DONE;
+
+ OperationKey final_transforms_key(&ob->id, DEPSNODE_TYPE_BONE, parchan->name, DEG_OPCODE_BONE_DONE);
+ add_relation(solver_key, final_transforms_key, DEPSREL_TYPE_TRANSFORM, "Spline IK Solver Result");
+
+ root_map->add_bone(parchan->name, rootchan->name);
+
+ /* continue up chain, until we reach target number of items... */
+ segcount++;
+ if ((segcount == data->chainlen) || (segcount > 255)) break; /* 255 is weak */
+ }
+
+ OperationKey flush_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
+ add_relation(solver_key, flush_key, DEPSREL_TYPE_OPERATION, "PoseEval Result-Bone Link");
+}
+
+/* Pose/Armature Bones Graph */
+void DepsgraphRelationBuilder::build_rig(Scene *scene, Object *ob)
+{
+ /* Armature-Data */
+ bArmature *arm = (bArmature *)ob->data;
+
+ // TODO: selection status?
+
+ /* attach links between pose operations */
+ OperationKey init_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_INIT);
+ OperationKey flush_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
+
+ add_relation(init_key, flush_key, DEPSREL_TYPE_COMPONENT_ORDER, "[Pose Init -> Pose Cleanup]");
+
+ /* Make sure pose is up-to-date with armature updates. */
+ OperationKey armature_key(&arm->id,
+ DEPSNODE_TYPE_PARAMETERS,
+ DEG_OPCODE_PLACEHOLDER,
+ "Armature Eval");
+ add_relation(armature_key, init_key, DEPSREL_TYPE_COMPONENT_ORDER, "Data dependency");
+
+ if (needs_animdata_node(&ob->id)) {
+ ComponentKey animation_key(&ob->id, DEPSNODE_TYPE_ANIMATION);
+ add_relation(animation_key, init_key, DEPSREL_TYPE_OPERATION, "Rig Animation");
+ }
+
+ /* IK Solvers...
+ * - These require separate processing steps are pose-level
+ * to be executed between chains of bones (i.e. once the
+ * base transforms of a bunch of bones is done)
+ *
+ * - We build relations for these before the dependencies
+ * between ops in the same component as it is necessary
+ * to check whether such bones are in the same IK chain
+ * (or else we get weird issues with either in-chain
+ * references, or with bones being parented to IK'd bones)
+ *
+ * Unsolved Issues:
+ * - Care is needed to ensure that multi-headed trees work out the same as in ik-tree building
+ * - Animated chain-lengths are a problem...
+ */
+ RootPChanMap root_map;
+ bool pose_depends_on_local_transform = false;
+ LINKLIST_FOREACH (bPoseChannel *, pchan, &ob->pose->chanbase) {
+ LINKLIST_FOREACH (bConstraint *, con, &pchan->constraints) {
+ switch (con->type) {
+ case CONSTRAINT_TYPE_KINEMATIC:
+ build_ik_pose(ob, pchan, con, &root_map);
+ pose_depends_on_local_transform = true;
+ break;
+
+ case CONSTRAINT_TYPE_SPLINEIK:
+ build_splineik_pose(ob, pchan, con, &root_map);
+ pose_depends_on_local_transform = true;
+ break;
+
+ /* Constraints which needs world's matrix for transform.
+ * TODO(sergey): More constraints here?
+ */
+ case CONSTRAINT_TYPE_ROTLIKE:
+ case CONSTRAINT_TYPE_SIZELIKE:
+ case CONSTRAINT_TYPE_LOCLIKE:
+ case CONSTRAINT_TYPE_TRANSLIKE:
+ /* TODO(sergey): Add used space check. */
+ pose_depends_on_local_transform = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ //root_map.print_debug();
+
+ if (pose_depends_on_local_transform) {
+ /* TODO(sergey): Once partial updates are possible use relation between
+ * object transform and solver itself in it's build function.
+ */
+ ComponentKey pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
+ ComponentKey local_transform_key(&ob->id, DEPSNODE_TYPE_TRANSFORM);
+ add_relation(local_transform_key, pose_key, DEPSREL_TYPE_TRANSFORM, "Local Transforms");
+ }
+
+
+ /* links between operations for each bone */
+ LINKLIST_FOREACH (bPoseChannel *, pchan, &ob->pose->chanbase) {
+ OperationKey bone_local_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_LOCAL);
+ OperationKey bone_pose_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_POSE_PARENT);
+ OperationKey bone_ready_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_READY);
+ OperationKey bone_done_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_DONE);
+
+ pchan->flag &= ~POSE_DONE;
+
+ /* pose init to bone local */
+ add_relation(init_key, bone_local_key, DEPSREL_TYPE_OPERATION, "PoseEval Source-Bone Link");
+
+ /* local to pose parenting operation */
+ add_relation(bone_local_key, bone_pose_key, DEPSREL_TYPE_OPERATION, "Bone Local - PoseSpace Link");
+
+ /* parent relation */
+ if (pchan->parent != NULL) {
+ eDepsOperation_Code parent_key_opcode;
+
+ /* NOTE: this difference in handling allows us to prevent lockups while ensuring correct poses for separate chains */
+ if (root_map.has_common_root(pchan->name, pchan->parent->name)) {
+ parent_key_opcode = DEG_OPCODE_BONE_READY;
+ }
+ else {
+ parent_key_opcode = DEG_OPCODE_BONE_DONE;
+ }
+
+ OperationKey parent_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->parent->name, parent_key_opcode);
+ add_relation(parent_key, bone_pose_key, DEPSREL_TYPE_TRANSFORM, "[Parent Bone -> Child Bone]");
+ }
+
+ /* constraints */
+ if (pchan->constraints.first != NULL) {
+ /* constraints stack and constraint dependencies */
+ build_constraints(scene, &ob->id, DEPSNODE_TYPE_BONE, pchan->name, &pchan->constraints, &root_map);
+
+ /* pose -> constraints */
+ OperationKey constraints_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_CONSTRAINTS);
+ add_relation(bone_pose_key, constraints_key, DEPSREL_TYPE_OPERATION, "Constraints Stack");
+
+ /* constraints -> ready */
+ // TODO: when constraint stack is exploded, this step should occur before the first IK solver
+ add_relation(constraints_key, bone_ready_key, DEPSREL_TYPE_OPERATION, "Constraints -> Ready");
+ }
+ else {
+ /* pose -> ready */
+ add_relation(bone_pose_key, bone_ready_key, DEPSREL_TYPE_OPERATION, "Pose -> Ready");
+ }
+
+ /* bone ready -> done
+ * NOTE: For bones without IK, this is all that's needed.
+ * For IK chains however, an additional rel is created from IK to done,
+ * with transitive reduction removing this one...
+ */
+ add_relation(bone_ready_key, bone_done_key, DEPSREL_TYPE_OPERATION, "Ready -> Done");
+
+ /* assume that all bones must be done for the pose to be ready (for deformers) */
+ add_relation(bone_done_key, flush_key, DEPSREL_TYPE_OPERATION, "PoseEval Result-Bone Link");
+ }
+}
+
+void DepsgraphRelationBuilder::build_proxy_rig(Object *ob)
+{
+ OperationKey pose_init_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_INIT);
+ OperationKey pose_done_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE, DEG_OPCODE_POSE_DONE);
+ LINKLIST_FOREACH (bPoseChannel *, pchan, &ob->pose->chanbase) {
+ OperationKey bone_local_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_LOCAL);
+ OperationKey bone_ready_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_READY);
+ OperationKey bone_done_key(&ob->id, DEPSNODE_TYPE_BONE, pchan->name, DEG_OPCODE_BONE_DONE);
+ add_relation(pose_init_key, bone_local_key, DEPSREL_TYPE_OPERATION, "Pose Init -> Bone Local");
+ add_relation(bone_local_key, bone_ready_key, DEPSREL_TYPE_OPERATION, "Local -> Ready");
+ add_relation(bone_ready_key, bone_done_key, DEPSREL_TYPE_OPERATION, "Ready -> Done");
+ add_relation(bone_done_key, pose_done_key, DEPSREL_TYPE_OPERATION, "Bone Done -> Pose Done");
+ }
+}
+
+} // namespace DEG
diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations_scene.cc b/source/blender/depsgraph/intern/builder/deg_builder_relations_scene.cc
new file mode 100644
index 0000000..6b51a95
--- /dev/null
+++ b/source/blender/depsgraph/intern/builder/deg_builder_relations_scene.cc
@@ -0,0 +1,162 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2013 Blender Foundation.
+ * All rights reserved.
+ *
+ * Original Author: Joshua Leung
+ * Contributor(s): Based on original depsgraph.c code - Blender Foundation (2005-2013)
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/depsgraph/intern/builder/deg_builder_relations_scene.cc
+ * \ingroup depsgraph
+ *
+ * Methods for constructing depsgraph
+ */
+
+#include "intern/builder/deg_builder_relations.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cstring> /* required for STREQ later on. */
+
+#include "MEM_guardedalloc.h"
+
+extern "C" {
+#include "BLI_blenlib.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_node_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+
+#include "BKE_main.h"
+#include "BKE_node.h"
+
+#include "DEG_depsgraph.h"
+#include "DEG_depsgraph_build.h"
+} /* extern "C" */
+
+#include "intern/builder/deg_builder.h"
+#include "intern/builder/deg_builder_pchanmap.h"
+
+#include "intern/nodes/deg_node.h"
+#include "intern/nodes/deg_node_component.h"
+#include "intern/nodes/deg_node_operation.h"
+
+#include "intern/depsgraph_intern.h"
+#include "intern/depsgraph_types.h"
+
+#include "util/deg_util_foreach.h"
+
+namespace DEG {
+
+void DepsgraphRelationBuilder::build_scene(Main *bmain, Scene *scene)
+{
+ /* LIB_TAG_DOIT is used to indicate whether node for given ID was already
+ * created or not.
+ */
+ BKE_main_id_tag_all(bmain, LIB_TAG_DOIT, false);
+ /* XXX nested node trees are not included in tag-clearing above,
+ * so we need to do this manually.
+ */
+ FOREACH_NODETREE(bmain, nodetree, id) {
+ if (id != (ID *)nodetree)
+ nodetree->id.tag &= ~LIB_TAG_DOIT;
+ } FOREACH_NODETREE_END
+
+ if (scene->set) {
+ // TODO: link set to scene, especially our timesource...
+ }
+
+ /* scene objects */
+ LINKLIST_FOREACH (Base *, base, &scene->base) {
+ Object *ob = base->object;
+
+ /* object itself */
+ build_object(bmain, scene, ob);
+
+ /* object that this is a proxy for */
+ if (ob->proxy) {
+ ob->proxy->proxy_from = ob;
+ build_object(bmain, scene, ob->proxy);
+ /* TODO(sergey): This is an inverted relation, matches old depsgraph
+ * behavior and need to be investigated if it still need to be inverted.
+ */
+ ComponentKey ob_pose_key(&ob->id, DEPSNODE_TYPE_EVAL_POSE);
+ ComponentKey proxy_pose_key(&ob->proxy->id, DEPSNODE_TYPE_EVAL_POSE);
+ add_relation(ob_pose_key, proxy_pose_key, DEPSREL_TYPE_TRANSFORM, "Proxy");
+ }
+
+ /* Object dupligroup. */
+ if (ob->dup_group) {
+ build_group(bmain, scene, ob, ob->dup_group);
+ }
+ }
+
+ /* rigidbody */
+ if (scene->rigidbody_world) {
+ build_rigidbody(scene);
+ }
+
+ /* scene's animation and drivers */
+ if (scene->adt) {
+ build_animdata(&scene->id);
+ }
+
+ /* world */
+ if (scene->world) {
+ build_world(scene->world);
+ }
+
+ /* compo nodes */
+ if (scene->nodetree) {
+ build_compositor(scene);
+ }
+
+ /* grease pencil */
+ if (scene->gpd) {
+ build_gpencil(&scene->id, scene->gpd);
+ }
+
+ /* Masks. */
+ LINKLIST_FOREACH (Mask *, mask, &bmain->mask) {
+ build_mask(mask);
+ }
+
+ /* Movie clips. */
+ LINKLIST_FOREACH (MovieClip *, clip, &bmain->movieclip) {
+ build_movieclip(clip);
+ }
+
+ for (Depsgraph::OperationNodes::const_iterator it_op = m_graph->operations.begin();
+ it_op != m_graph->operations.end();
+ ++it_op)
+ {
+ OperationDepsNode *node = *it_op;
+ IDDepsNode *id_node = node->owner->owner;
+ ID *id = id_node->id;
+ if (GS(id->name) == ID_OB) {
+ Object *object = (Object *)id;
+ object->customdata_mask |= node->customdata_mask;
+ }
+ }
+}
+
+} // namespace DEG
diff --git a/source/blender/depsgraph/intern/debug/deg_debug_graphviz.cc b/source/blender/depsgraph/intern/debug/deg_debug_graphviz.cc
index 70cd5f1..0d56ce7 100644
--- a/source/blender/depsgraph/intern/debug/deg_debug_graphviz.cc
+++ b/source/blender/depsgraph/intern/debug/deg_debug_graphviz.cc
@@ -321,7 +321,7 @@ static void deg_debug_graphviz_node_single(const DebugContext &ctx,
static void deg_debug_graphviz_node_cluster_begin(const DebugContext &ctx,
const DepsNode *node)
{
- string name = node->identifier().c_str();
+ string name = node->identifier();
if (node->type == DEPSNODE_TYPE_ID_REF) {
IDDepsNode *id_node = (IDDepsNode *)node;
char buf[256];
diff --git a/source/blender/depsgraph/intern/depsgraph.cc b/source/blender/depsgraph/intern/depsgraph.cc
index 2b7c637..5604044 100644
--- a/source/blender/depsgraph/intern/depsgraph.cc
+++ b/source/blender/depsgraph/intern/depsgraph.cc
@@ -32,8 +32,6 @@
#include "intern/depsgraph.h" /* own include */
-#include <string.h>
-
#include "MEM_guardedalloc.h"
#include "BLI_utildefines.h"
@@ -53,6 +51,8 @@ extern "C" {
#include "RNA_access.h"
}
+#include <cstring>
+
#include "DEG_depsgraph.h"
#include "intern/nodes/deg_node.h"
@@ -116,7 +116,7 @@ static bool pointer_to_component_node_criteria(const PointerRNA *ptr,
const PropertyRNA *prop,
ID **id,
eDepsNode_Type *type,
- string *subdata)
+ const char **subdata)
{
if (!ptr->type)
return false;
@@ -189,16 +189,23 @@ static bool pointer_to_component_node_criteria(const PointerRNA *ptr,
/* Transforms props? */
if (prop) {
const char *prop_identifier = RNA_property_identifier((PropertyRNA *)prop);
-
+ /* TODO(sergey): How to optimize this? */
if (strstr(prop_identifier, "location") ||
strstr(prop_identifier, "rotation") ||
- strstr(prop_identifier, "scale"))
+ strstr(prop_identifier, "scale") ||
+ strstr(prop_identifier, "matrix_"))
{
*type = DEPSNODE_TYPE_TRANSFORM;
return true;
}
+ else if (strstr(prop_identifier, "data")) {
+ /* We access object.data, most likely a geometry.
+ * Might be a bone tho..
+ */
+ *type = DEPSNODE_TYPE_GEOMETRY;
+ return true;
+ }
}
- // ...
}
else if (ptr->type == &RNA_ShapeKey) {
Key *key = (Key *)ptr->id.data;
@@ -232,7 +239,7 @@ DepsNode *Depsgraph::find_node_from_pointer(const PointerRNA *ptr,
{
ID *id;
eDepsNode_Type type;
- string name;
+ const char *name;
/* Get querying conditions. */
if (pointer_to_id_node_criteria(ptr, prop, &id)) {
@@ -240,8 +247,9 @@ DepsNode *Depsgraph::find_node_from_pointer(const PointerRNA *ptr,
}
else if (pointer_to_component_node_criteria(ptr, prop, &id, &type, &name)) {
IDDepsNode *id_node = find_id_node(id);
- if (id_node)
+ if (id_node != NULL) {
return id_node->find_component(type, name);
+ }
}
return NULL;
@@ -328,7 +336,7 @@ IDDepsNode *Depsgraph::find_id_node(const ID *id) const
return reinterpret_cast<IDDepsNode *>(BLI_ghash_lookup(id_hash, id));
}
-IDDepsNode *Depsgraph::add_id_node(ID *id, const string &name)
+IDDepsNode *Depsgraph::add_id_node(ID *id, const char *name)
{
IDDepsNode *id_node = find_id_node(id);
if (!id_node) {
@@ -370,8 +378,7 @@ DepsRelation *Depsgraph::add_new_relation(OperationDepsNode *from,
if (comp_node->type == DEPSNODE_TYPE_GEOMETRY) {
IDDepsNode *id_to = to->owner->owner;
IDDepsNode *id_from = from->owner->owner;
- Object *object_to = (Object *)id_to->id;
- if (id_to != id_from && (object_to->recalc & OB_RECALC_ALL)) {
+ if (id_to != id_from && (id_to->id->tag & LIB_TAG_ID_RECALC_ALL)) {
if ((id_from->eval_flags & DAG_EVAL_NEED_CPU) == 0) {
id_from->tag_update(this);
id_from->eval_flags |= DAG_EVAL_NEED_CPU;
diff --git a/source/blender/depsgraph/intern/depsgraph.h b/source/blender/depsgraph/intern/depsgraph.h
index 08b264f..e668fac 100644
--- a/source/blender/depsgraph/intern/depsgraph.h
+++ b/source/blender/depsgraph/intern/depsgraph.h
@@ -101,22 +101,6 @@ struct Depsgraph {
~Depsgraph();
/**
- * Find node which matches the specified description.
- *
- * \param id: ID block that is associated with this
- * \param subdata: identifier used for sub-ID data (e.g. bone)
- * \param type: type of node we're dealing with
- * \param name: custom identifier assigned to node
- *
- * \return A node matching the required characteristics if it exists
- * or NULL if no such node exists in the graph.
- */
- DepsNode *find_node(const ID *id,
- eDepsNode_Type type,
- const string &subdata,
- const string &name);
-
- /**
* Convenience wrapper to find node given just pointer + property.
*
* \param ptr: pointer to the data that node will represent
@@ -136,7 +120,7 @@ struct Depsgraph {
void clear_subgraph_nodes();
IDDepsNode *find_id_node(const ID *id) const;
- IDDepsNode *add_id_node(ID *id, const string &name = "");
+ IDDepsNode *add_id_node(ID *id, const char *name = "");
void remove_id_node(const ID *id);
void clear_id_nodes();
diff --git a/source/blender/depsgraph/intern/depsgraph_build.cc b/source/blender/depsgraph/intern/depsgraph_build.cc
index 7a3b19e..9952f71 100644
--- a/source/blender/depsgraph/intern/depsgraph_build.cc
+++ b/source/blender/depsgraph/intern/depsgraph_build.cc
@@ -32,6 +32,8 @@
#include "MEM_guardedalloc.h"
+// #define DEBUG_TIME
+
extern "C" {
#include "DNA_cachefile_types.h"
#include "DNA_object_types.h"
@@ -41,6 +43,11 @@ extern "C" {
#include "BLI_utildefines.h"
#include "BLI_ghash.h"
+#ifdef DEBUG_TIME
+# include "PIL_time.h"
+# include "PIL_time_utildefines.h"
+#endif
+
#include "BKE_main.h"
#include "BKE_collision.h"
#include "BKE_effect.h"
@@ -190,6 +197,10 @@ void DEG_add_special_eval_flag(Depsgraph *graph, ID *id, short flag)
*/
void DEG_graph_build_from_scene(Depsgraph *graph, Main *bmain, Scene *scene)
{
+#ifdef DEBUG_TIME
+ TIMEIT_START(DEG_graph_build_from_scene);
+#endif
+
DEG::Depsgraph *deg_graph = reinterpret_cast<DEG::Depsgraph *>(graph);
/* 1) Generate all the nodes in the graph first */
@@ -239,6 +250,10 @@ void DEG_graph_build_from_scene(Depsgraph *graph, Main *bmain, Scene *scene)
abort();
}
#endif
+
+#ifdef DEBUG_TIME
+ TIMEIT_END(DEG_graph_build_from_scene);
+#endif
}
/* Tag graph relations for update. */
@@ -309,7 +324,15 @@ void DEG_scene_graph_free(Scene *scene)
}
}
-void DEG_add_collision_relations(DepsNodeHandle *handle, Scene *scene, Object *ob, Group *group, int layer, unsigned int modifier_type, DEG_CollobjFilterFunction fn, bool dupli, const char *name)
+void DEG_add_collision_relations(DepsNodeHandle *handle,
+ Scene *scene,
+ Object *ob,
+ Group *group,
+ int layer,
+ unsigned int modifier_type,
+ DEG_CollobjFilterFunction fn,
+ bool dupli,
+ const char *name)
{
unsigned int numcollobj;
Object **collobjs = get_collisionobjects_ext(scene, ob, group, layer, &numcollobj, modifier_type, dupli);
@@ -327,7 +350,13 @@ void DEG_add_collision_relations(DepsNodeHandle *handle, Scene *scene, Object *o
MEM_freeN(collobjs);
}
-void DEG_add_forcefield_relations(DepsNodeHandle *handle, Scene *scene, Object *ob, EffectorWeights *effector_weights, bool add_absorption, int skip_forcefield, const char *name)
+void DEG_add_forcefield_relations(DepsNodeHandle *handle,
+ Scene *scene,
+ Object *ob,
+ EffectorWeights *effector_weights,
+ bool add_absorption,
+ int skip_forcefield,
+ const char *name)
{
ListBase *effectors = pdInitEffectors(scene, ob, NULL, effector_weights, false);
@@ -339,17 +368,33 @@ void DEG_add_forcefield_relations(DepsNodeHandle *handle, Scene *scene, Object *
if (eff->psys) {
DEG_add_object_relation(handle, eff->ob, DEG_OB_COMP_EVAL_PARTICLES, name);
- /* TODO: remove this when/if EVAL_PARTICLES is sufficient for up to date particles */
+ /* TODO: remove this when/if EVAL_PARTICLES is sufficient
+ * for up to date particles.
+ */
DEG_add_object_relation(handle, eff->ob, DEG_OB_COMP_GEOMETRY, name);
}
if (eff->pd->forcefield == PFIELD_SMOKEFLOW && eff->pd->f_source) {
- DEG_add_object_relation(handle, eff->pd->f_source, DEG_OB_COMP_TRANSFORM, "Smoke Force Domain");
- DEG_add_object_relation(handle, eff->pd->f_source, DEG_OB_COMP_GEOMETRY, "Smoke Force Domain");
+ DEG_add_object_relation(handle,
+ eff->pd->f_source,
+ DEG_OB_COMP_TRANSFORM,
+ "Smoke Force Domain");
+ DEG_add_object_relation(handle,
+ eff->pd->f_source,
+ DEG_OB_COMP_GEOMETRY,
+ "Smoke Force Domain");
}
if (add_absorption && (eff->pd->flag & PFIELD_VISIBILITY)) {
- DEG_add_collision_relations(handle, scene, ob, NULL, eff->ob->lay, eModifierType_Collision, NULL, true, "Force Absorption");
+ DEG_add_collision_relations(handle,
+ scene,
+ ob,
+ NULL,
+ eff->ob->lay,
+ eModifierType_Collision,
+ NULL,
+ true,
+ "Force Absorption");
}
}
}
diff --git a/source/blender/depsgraph/intern/depsgraph_intern.h b/source/blender/depsgraph/intern/depsgraph_intern.h
index e5d3d1f..2d8e7dc 100644
--- a/source/blender/depsgraph/intern/depsgraph_intern.h
+++ b/source/blender/depsgraph/intern/depsgraph_intern.h
@@ -63,8 +63,8 @@ struct DepsNodeFactory {
virtual const char *tname() const = 0;
virtual DepsNode *create_node(const ID *id,
- const string &subdata,
- const string &name) const = 0;
+ const char *subdata,
+ const char *name) const = 0;
};
template <class NodeType>
@@ -73,7 +73,7 @@ struct DepsNodeFactoryImpl : public DepsNodeFactory {
eDepsNode_Class tclass() const { return NodeType::typeinfo.tclass; }
const char *tname() const { return NodeType::typeinfo.tname; }
- DepsNode *create_node(const ID *id, const string &subdata, const string &name) const
+ DepsNode *create_node(const ID *id, const char *subdata, const char *name) const
{
DepsNode *node = OBJECT_GUARDED_NEW(NodeType);
@@ -81,12 +81,14 @@ struct DepsNodeFactoryImpl : public DepsNodeFactory {
node->type = type();
node->tclass = tclass();
- if (!name.empty())
+ if (name[0] != '\0') {
/* set name if provided ... */
node->name = name;
- else
+ }
+ else {
/* ... otherwise use default type name */
node->name = tname();
+ }
node->init(id, subdata);
diff --git a/source/blender/depsgraph/intern/depsgraph_tag.cc b/source/blender/depsgraph/intern/depsgraph_tag.cc
index b7b62bd..e8ed036 100644
--- a/source/blender/depsgraph/intern/depsgraph_tag.cc
+++ b/source/blender/depsgraph/intern/depsgraph_tag.cc
@@ -31,7 +31,7 @@
*/
#include <stdio.h>
-#include <cstring>
+#include <cstring> /* required for memset */
#include <queue>
extern "C" {
@@ -235,6 +235,9 @@ void DEG_id_tag_update_ex(Main *bmain, ID *id, short flag)
if (flag & (OB_RECALC_OB | OB_RECALC_DATA)) {
DEG_graph_id_tag_update(bmain, graph, id);
}
+ else if (flag & OB_RECALC_TIME) {
+ DEG_graph_id_tag_update(bmain, graph, id);
+ }
}
}
diff --git a/source/blender/depsgraph/intern/eval/deg_eval.cc b/source/blender/depsgraph/intern/eval/deg_eval.cc
index c3fd202..065f656 100644
--- a/source/blender/depsgraph/intern/eval/deg_eval.cc
+++ b/source/blender/depsgraph/intern/eval/deg_eval.cc
@@ -152,7 +152,7 @@ static void deg_task_run_func(TaskPool *pool,
}
if ((rel->flag & DEPSREL_FLAG_CYCLIC) == 0) {
BLI_assert(child->num_links_pending > 0);
- atomic_sub_uint32(&child->num_links_pending, 1);
+ atomic_sub_and_fetch_uint32(&child->num_links_pending, 1);
}
if (child->num_links_pending == 0) {
bool is_scheduled = atomic_fetch_and_or_uint8(
@@ -287,7 +287,7 @@ static void schedule_node(TaskPool *pool, Depsgraph *graph, unsigned int layers,
{
if (dec_parents) {
BLI_assert(node->num_links_pending > 0);
- atomic_sub_uint32(&node->num_links_pending, 1);
+ atomic_sub_and_fetch_uint32(&node->num_links_pending, 1);
}
if (node->num_links_pending == 0) {
@@ -304,7 +304,7 @@ static void schedule_node(TaskPool *pool, Depsgraph *graph, unsigned int layers,
deg_task_run_func,
node,
false,
- TASK_PRIORITY_LOW,
+ TASK_PRIORITY_HIGH,
thread_id);
}
}
diff --git a/source/blender/depsgraph/intern/eval/deg_eval_debug.cc b/source/blender/depsgraph/intern/eval/deg_eval_debug.cc
index 67d64aa..060544a 100644
--- a/source/blender/depsgraph/intern/eval/deg_eval_debug.cc
+++ b/source/blender/depsgraph/intern/eval/deg_eval_debug.cc
@@ -30,10 +30,10 @@
* Implementation of tools for debugging the depsgraph
*/
-#include <cstring>
-
#include "intern/eval/deg_eval_debug.h"
+#include <cstring> /* required for STREQ later on. */
+
extern "C" {
#include "BLI_listbase.h"
#include "BLI_ghash.h"
@@ -53,10 +53,10 @@ namespace DEG {
DepsgraphStats *DepsgraphDebug::stats = NULL;
-static string get_component_name(eDepsNode_Type type, const string &name = "")
+static string get_component_name(eDepsNode_Type type, const char *name = "")
{
DepsNodeFactory *factory = deg_get_node_factory(type);
- if (name.empty()) {
+ if (name[0] != '\0') {
return string(factory->tname());
}
else {
@@ -116,7 +116,7 @@ void DepsgraphDebug::task_started(Depsgraph *graph,
*/
DepsgraphStatsComponent *comp_stats =
get_component_stats(id, get_component_name(comp->type,
- comp->name),
+ comp->name).c_str(),
true);
times_clear(comp_stats->times);
}
@@ -146,7 +146,7 @@ void DepsgraphDebug::task_completed(Depsgraph *graph,
DepsgraphStatsComponent *comp_stats =
get_component_stats(id,
get_component_name(comp->type,
- comp->name),
+ comp->name).c_str(),
true);
times_add(comp_stats->times, time);
}
@@ -226,7 +226,7 @@ DepsgraphStatsID *DepsgraphDebug::get_id_stats(ID *id, bool create)
DepsgraphStatsComponent *DepsgraphDebug::get_component_stats(
DepsgraphStatsID *id_stats,
- const string &name,
+ const char *name,
bool create)
{
DepsgraphStatsComponent *comp_stats;
@@ -234,13 +234,14 @@ DepsgraphStatsComponent *DepsgraphDebug::get_component_stats(
comp_stats != NULL;
comp_stats = comp_stats->next)
{
- if (STREQ(comp_stats->name, name.c_str()))
+ if (STREQ(comp_stats->name, name)) {
break;
+ }
}
if (!comp_stats && create) {
comp_stats = (DepsgraphStatsComponent *)MEM_callocN(sizeof(DepsgraphStatsComponent),
"Depsgraph Component Stats");
- BLI_strncpy(comp_stats->name, name.c_str(), sizeof(comp_stats->name));
+ BLI_strncpy(comp_stats->name, name, sizeof(comp_stats->name));
BLI_addtail(&id_stats->components, comp_stats);
}
return comp_stats;
diff --git a/source/blender/depsgraph/intern/eval/deg_eval_debug.h b/source/blender/depsgraph/intern/eval/deg_eval_debug.h
index 9109019..0bbe88c 100644
--- a/source/blender/depsgraph/intern/eval/deg_eval_debug.h
+++ b/source/blender/depsgraph/intern/eval/deg_eval_debug.h
@@ -66,10 +66,10 @@ struct DepsgraphDebug {
static DepsgraphStatsID *get_id_stats(ID *id, bool create);
static DepsgraphStatsComponent *get_component_stats(DepsgraphStatsID *id_stats,
- const string &name,
+ const char *name,
bool create);
static DepsgraphStatsComponent *get_component_stats(ID *id,
- const string &name,
+ const char *name,
bool create)
{
return get_component_stats(get_id_stats(id, create), name, create);
diff --git a/source/blender/depsgraph/intern/nodes/deg_node.cc b/source/blender/depsgraph/intern/nodes/deg_node.cc
index eb408f2..57b25c1 100644
--- a/source/blender/depsgraph/intern/nodes/deg_node.cc
+++ b/source/blender/depsgraph/intern/nodes/deg_node.cc
@@ -31,7 +31,7 @@
#include "intern/nodes/deg_node.h"
#include <stdio.h>
-#include <string.h>
+#include <cstring> /* required for STREQ later on. */
#include "BLI_utildefines.h"
#include "BLI_ghash.h"
@@ -72,7 +72,7 @@ DepsNode::TypeInfo::TypeInfo(eDepsNode_Type type, const char *tname)
DepsNode::DepsNode()
{
- name[0] = '\0';
+ name = "";
}
DepsNode::~DepsNode()
@@ -122,7 +122,7 @@ RootDepsNode::~RootDepsNode()
OBJECT_GUARDED_DELETE(time_source, TimeSourceDepsNode);
}
-TimeSourceDepsNode *RootDepsNode::add_time_source(const string &name)
+TimeSourceDepsNode *RootDepsNode::add_time_source(const char *name)
{
if (!time_source) {
DepsNodeFactory *factory = deg_get_node_factory(DEPSNODE_TYPE_TIMESOURCE);
@@ -142,12 +142,24 @@ static DepsNodeFactoryImpl<TimeSourceDepsNode> DNTI_TIMESOURCE;
/* ID Node ================================================ */
+IDDepsNode::ComponentIDKey::ComponentIDKey(eDepsNode_Type type,
+ const char *name)
+ : type(type), name(name)
+{
+}
+
+bool IDDepsNode::ComponentIDKey::operator== (const ComponentIDKey &other) const
+{
+ return type == other.type &&
+ STREQ(name, other.name);
+}
+
static unsigned int id_deps_node_hash_key(const void *key_v)
{
const IDDepsNode::ComponentIDKey *key =
reinterpret_cast<const IDDepsNode::ComponentIDKey *>(key_v);
return hash_combine(BLI_ghashutil_uinthash(key->type),
- BLI_ghashutil_strhash_p(key->name.c_str()));
+ BLI_ghashutil_strhash_p(key->name));
}
static bool id_deps_node_hash_key_cmp(const void *a, const void *b)
@@ -173,7 +185,7 @@ static void id_deps_node_hash_value_free(void *value_v)
}
/* Initialize 'id' node - from pointer data given. */
-void IDDepsNode::init(const ID *id, const string &UNUSED(subdata))
+void IDDepsNode::init(const ID *id, const char *UNUSED(subdata))
{
/* Store ID-pointer. */
BLI_assert(id != NULL);
@@ -204,14 +216,14 @@ IDDepsNode::~IDDepsNode()
}
ComponentDepsNode *IDDepsNode::find_component(eDepsNode_Type type,
- const string &name) const
+ const char *name) const
{
ComponentIDKey key(type, name);
return reinterpret_cast<ComponentDepsNode *>(BLI_ghash_lookup(components, &key));
}
ComponentDepsNode *IDDepsNode::add_component(eDepsNode_Type type,
- const string &name)
+ const char *name)
{
ComponentDepsNode *comp_node = find_component(type, name);
if (!comp_node) {
@@ -226,7 +238,7 @@ ComponentDepsNode *IDDepsNode::add_component(eDepsNode_Type type,
return comp_node;
}
-void IDDepsNode::remove_component(eDepsNode_Type type, const string &name)
+void IDDepsNode::remove_component(eDepsNode_Type type, const char *name)
{
ComponentDepsNode *comp_node = find_component(type, name);
if (comp_node) {
@@ -281,7 +293,7 @@ static DepsNodeFactoryImpl<IDDepsNode> DNTI_ID_REF;
/* Subgraph Node ========================================== */
/* Initialize 'subgraph' node - from pointer data given. */
-void SubgraphDepsNode::init(const ID *id, const string &UNUSED(subdata))
+void SubgraphDepsNode::init(const ID *id, const char *UNUSED(subdata))
{
/* Store ID-ref if provided. */
this->root_id = (ID *)id;
diff --git a/source/blender/depsgraph/intern/nodes/deg_node.h b/source/blender/depsgraph/intern/nodes/deg_node.h
index b2262c4..7c2f538 100644
--- a/source/blender/depsgraph/intern/nodes/deg_node.h
+++ b/source/blender/depsgraph/intern/nodes/deg_node.h
@@ -32,6 +32,8 @@
#include "intern/depsgraph_types.h"
+#include "BLI_utildefines.h"
+
struct ID;
struct GHash;
struct Scene;
@@ -57,7 +59,7 @@ struct DepsNode {
};
/* Identifier - mainly for debugging purposes. */
- string name;
+ const char *name;
/* Structural type of node. */
eDepsNode_Type type;
@@ -78,8 +80,9 @@ struct DepsNode {
/* Nodes which depend on this one. */
Relations outlinks;
- /* Generic tag for traversal algorithms */
+ /* Generic tags for traversal algorithms. */
int done;
+ int tag;
/* Methods. */
@@ -90,7 +93,7 @@ struct DepsNode {
string full_identifier() const;
virtual void init(const ID * /*id*/,
- const string &/*subdata*/) {}
+ const char * /*subdata*/) {}
virtual void tag_update(Depsgraph * /*graph*/) {}
@@ -129,7 +132,7 @@ struct RootDepsNode : public DepsNode {
RootDepsNode();
~RootDepsNode();
- TimeSourceDepsNode *add_time_source(const string &name = "");
+ TimeSourceDepsNode *add_time_source(const char *name = "");
/* scene that this corresponds to */
Scene *scene;
@@ -143,26 +146,21 @@ struct RootDepsNode : public DepsNode {
/* ID-Block Reference */
struct IDDepsNode : public DepsNode {
struct ComponentIDKey {
- ComponentIDKey(eDepsNode_Type type, const string &name = "")
- : type(type), name(name) {}
-
- bool operator== (const ComponentIDKey &other) const
- {
- return type == other.type && name == other.name;
- }
+ ComponentIDKey(eDepsNode_Type type, const char *name = "");
+ bool operator==(const ComponentIDKey &other) const;
eDepsNode_Type type;
- string name;
+ const char *name;
};
- void init(const ID *id, const string &subdata);
+ void init(const ID *id, const char *subdata);
~IDDepsNode();
ComponentDepsNode *find_component(eDepsNode_Type type,
- const string &name = "") const;
+ const char *name = "") const;
ComponentDepsNode *add_component(eDepsNode_Type type,
- const string &name = "");
- void remove_component(eDepsNode_Type type, const string &name = "");
+ const char *name = "");
+ void remove_component(eDepsNode_Type type, const char *name = "");
void clear_components();
void tag_update(Depsgraph *graph);
@@ -189,7 +187,7 @@ struct IDDepsNode : public DepsNode {
/* Subgraph Reference. */
struct SubgraphDepsNode : public DepsNode {
- void init(const ID *id, const string &subdata);
+ void init(const ID *id, const char *subdata);
~SubgraphDepsNode();
/* Instanced graph. */
diff --git a/source/blender/depsgraph/intern/nodes/deg_node_component.cc b/source/blender/depsgraph/intern/nodes/deg_node_component.cc
index 01f33b6..06f91ac 100644
--- a/source/blender/depsgraph/intern/nodes/deg_node_component.cc
+++ b/source/blender/depsgraph/intern/nodes/deg_node_component.cc
@@ -31,7 +31,7 @@
#include "intern/nodes/deg_node_component.h"
#include <stdio.h>
-#include <string.h>
+#include <cstring> /* required for STREQ later on. */
extern "C" {
#include "BLI_utildefines.h"
@@ -53,12 +53,50 @@ namespace DEG {
/* Standard Component Methods ============================= */
+ComponentDepsNode::OperationIDKey::OperationIDKey()
+ : opcode(DEG_OPCODE_OPERATION),
+ name(""),
+ name_tag(-1)
+{
+}
+
+ComponentDepsNode::OperationIDKey::OperationIDKey(eDepsOperation_Code opcode)
+ : opcode(opcode),
+ name(""),
+ name_tag(-1)
+{
+}
+
+ComponentDepsNode::OperationIDKey::OperationIDKey(eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag)
+ : opcode(opcode),
+ name(name),
+ name_tag(name_tag)
+{
+}
+
+string ComponentDepsNode::OperationIDKey::identifier() const
+{
+ char codebuf[5];
+ BLI_snprintf(codebuf, sizeof(codebuf), "%d", opcode);
+ return string("OperationIDKey(") + codebuf + ", " + name + ")";
+}
+
+bool ComponentDepsNode::OperationIDKey::operator==(
+ const OperationIDKey &other) const
+{
+ return (opcode == other.opcode) &&
+ (STREQ(name, other.name)) &&
+ (name_tag == other.name_tag);
+}
+
static unsigned int comp_node_hash_key(const void *key_v)
{
const ComponentDepsNode::OperationIDKey *key =
reinterpret_cast<const ComponentDepsNode::OperationIDKey *>(key_v);
return hash_combine(BLI_ghashutil_uinthash(key->opcode),
- BLI_ghashutil_strhash_p(key->name.c_str()));
+ BLI_ghashutil_strhash_p(key->name));
}
static bool comp_node_hash_key_cmp(const void *a, const void *b)
@@ -95,7 +133,7 @@ ComponentDepsNode::ComponentDepsNode() :
/* Initialize 'component' node - from pointer data given */
void ComponentDepsNode::init(const ID * /*id*/,
- const string & /*subdata*/)
+ const char * /*subdata*/)
{
/* hook up eval context? */
// XXX: maybe this needs a special API?
@@ -114,7 +152,7 @@ ComponentDepsNode::~ComponentDepsNode()
string ComponentDepsNode::identifier() const
{
- string &idname = this->owner->name;
+ string idname = this->owner->name;
char typebuf[16];
sprintf(typebuf, "(%d)", type);
@@ -139,9 +177,11 @@ OperationDepsNode *ComponentDepsNode::find_operation(OperationIDKey key) const
}
}
-OperationDepsNode *ComponentDepsNode::find_operation(eDepsOperation_Code opcode, const string &name) const
+OperationDepsNode *ComponentDepsNode::find_operation(eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag) const
{
- OperationIDKey key(opcode, name);
+ OperationIDKey key(opcode, name, name_tag);
return find_operation(key);
}
@@ -151,21 +191,26 @@ OperationDepsNode *ComponentDepsNode::has_operation(OperationIDKey key) const
}
OperationDepsNode *ComponentDepsNode::has_operation(eDepsOperation_Code opcode,
- const string &name) const
+ const char *name,
+ int name_tag) const
{
- OperationIDKey key(opcode, name);
+ OperationIDKey key(opcode, name, name_tag);
return has_operation(key);
}
-OperationDepsNode *ComponentDepsNode::add_operation(eDepsOperation_Type optype, DepsEvalOperationCb op, eDepsOperation_Code opcode, const string &name)
+OperationDepsNode *ComponentDepsNode::add_operation(eDepsOperation_Type optype,
+ DepsEvalOperationCb op,
+ eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag)
{
- OperationDepsNode *op_node = has_operation(opcode, name);
+ OperationDepsNode *op_node = has_operation(opcode, name, name_tag);
if (!op_node) {
DepsNodeFactory *factory = deg_get_node_factory(DEPSNODE_TYPE_OPERATION);
op_node = (OperationDepsNode *)factory->create_node(this->owner->id, "", name);
/* register opnode in this component's operation set */
- OperationIDKey *key = OBJECT_GUARDED_NEW(OperationIDKey, opcode, name);
+ OperationIDKey *key = OBJECT_GUARDED_NEW(OperationIDKey, opcode, name, name_tag);
BLI_ghash_insert(operations_map, key, op_node);
/* set as entry/exit node of component (if appropriate) */
@@ -197,16 +242,6 @@ OperationDepsNode *ComponentDepsNode::add_operation(eDepsOperation_Type optype,
return op_node;
}
-void ComponentDepsNode::remove_operation(eDepsOperation_Code opcode, const string &name)
-{
- /* unregister */
- OperationIDKey key(opcode, name);
- BLI_ghash_remove(operations_map,
- &key,
- comp_node_hash_key_free,
- comp_node_hash_key_free);
-}
-
void ComponentDepsNode::clear_operations()
{
if (operations_map != NULL) {
@@ -337,7 +372,7 @@ static DepsNodeFactoryImpl<PoseComponentDepsNode> DNTI_EVAL_POSE;
/* Bone Component ========================================= */
/* Initialize 'bone component' node - from pointer data given */
-void BoneComponentDepsNode::init(const ID *id, const string &subdata)
+void BoneComponentDepsNode::init(const ID *id, const char *subdata)
{
/* generic component-node... */
ComponentDepsNode::init(id, subdata);
@@ -350,7 +385,7 @@ void BoneComponentDepsNode::init(const ID *id, const string &subdata)
/* bone-specific node data */
Object *ob = (Object *)id;
- this->pchan = BKE_pose_channel_find_name(ob->pose, subdata.c_str());
+ this->pchan = BKE_pose_channel_find_name(ob->pose, subdata);
}
DEG_DEPSNODE_DEFINE(BoneComponentDepsNode, DEPSNODE_TYPE_BONE, "Bone Component");
diff --git a/source/blender/depsgraph/intern/nodes/deg_node_component.h b/source/blender/depsgraph/intern/nodes/deg_node_component.h
index 7dec8ea..969771a 100644
--- a/source/blender/depsgraph/intern/nodes/deg_node_component.h
+++ b/source/blender/depsgraph/intern/nodes/deg_node_component.h
@@ -53,50 +53,38 @@ struct ComponentDepsNode : public DepsNode {
struct OperationIDKey
{
eDepsOperation_Code opcode;
- string name;
-
-
- OperationIDKey() :
- opcode(DEG_OPCODE_OPERATION), name("")
- {}
- OperationIDKey(eDepsOperation_Code opcode) :
- opcode(opcode), name("")
- {}
- OperationIDKey(eDepsOperation_Code opcode, const string &name) :
- opcode(opcode), name(name)
- {}
-
- string identifier() const
- {
- char codebuf[5];
- BLI_snprintf(codebuf, sizeof(codebuf), "%d", opcode);
-
- return string("OperationIDKey(") + codebuf + ", " + name + ")";
- }
-
- bool operator==(const OperationIDKey &other) const
- {
- return (opcode == other.opcode) && (name == other.name);
- }
+ const char *name;
+ int name_tag;
+
+ OperationIDKey();
+ OperationIDKey(eDepsOperation_Code opcode);
+ OperationIDKey(eDepsOperation_Code opcode,
+ const char *name,
+ int name_tag);
+
+ string identifier() const;
+ bool operator==(const OperationIDKey &other) const;
};
/* Typedef for container of operations */
ComponentDepsNode();
~ComponentDepsNode();
- void init(const ID *id, const string &subdata);
+ void init(const ID *id, const char *subdata);
string identifier() const;
/* Find an existing operation, will throw an assert() if it does not exist. */
OperationDepsNode *find_operation(OperationIDKey key) const;
OperationDepsNode *find_operation(eDepsOperation_Code opcode,
- const string &name) const;
+ const char *name,
+ int name_tag) const;
/* Check operation exists and return it. */
OperationDepsNode *has_operation(OperationIDKey key) const;
OperationDepsNode *has_operation(eDepsOperation_Code opcode,
- const string &name) const;
+ const char *name,
+ int name_tag) const;
/**
* Create a new node for representing an operation and add this to graph
@@ -114,9 +102,9 @@ struct ComponentDepsNode : public DepsNode {
OperationDepsNode *add_operation(eDepsOperation_Type optype,
DepsEvalOperationCb op,
eDepsOperation_Code opcode,
- const string &name);
+ const char *name,
+ int name_tag);
- void remove_operation(eDepsOperation_Code opcode, const string &name);
void clear_operations();
void tag_update(Depsgraph *graph);
@@ -194,7 +182,7 @@ struct PoseComponentDepsNode : public ComponentDepsNode {
/* Bone Component */
struct BoneComponentDepsNode : public ComponentDepsNode {
- void init(const ID *id, const string &subdata);
+ void init(const ID *id, const char *subdata);
struct bPoseChannel *pchan; /* the bone that this component represents */
diff --git a/source/blender/depsgraph/intern/nodes/deg_node_operation.cc b/source/blender/depsgraph/intern/nodes/deg_node_operation.cc
index 5847af2..9eed4df 100644
--- a/source/blender/depsgraph/intern/nodes/deg_node_operation.cc
+++ b/source/blender/depsgraph/intern/nodes/deg_node_operation.cc
@@ -68,7 +68,7 @@ string OperationDepsNode::full_identifier() const
{
string owner_str = "";
if (owner->type == DEPSNODE_TYPE_BONE) {
- owner_str = owner->owner->name + "." + owner->name;
+ owner_str = string(owner->owner->name) + "." + owner->name;
}
else {
owner_str = owner->owner->name;
diff --git a/source/blender/depsgraph/util/deg_util_foreach.h b/source/blender/depsgraph/util/deg_util_foreach.h
index 14cf4fc..87d3716 100644
--- a/source/blender/depsgraph/util/deg_util_foreach.h
+++ b/source/blender/depsgraph/util/deg_util_foreach.h
@@ -66,3 +66,8 @@
#define GSET_FOREACH_END() \
} \
} while(0)
+
+#define LINKLIST_FOREACH(type, var, list) \
+ for (type var = (type)((list)->first); \
+ var != NULL; \
+ var = (type)(((Link*)(var))->next))
diff --git a/source/blender/editors/space_file/filelist.c b/source/blender/editors/space_file/filelist.c
index ab95a77..f8ba619 100644
--- a/source/blender/editors/space_file/filelist.c
+++ b/source/blender/editors/space_file/filelist.c
@@ -2509,7 +2509,7 @@ static void filelist_readjob_do(
* Using an atomic operation to avoid having to lock thread...
* Note that we do not really need this here currently, since there is a single listing thread, but better
* remain consistent about threading! */
- *((uint32_t *)entry->uuid) = atomic_add_uint32((uint32_t *)filelist->filelist_intern.curr_uuid, 1);
+ *((uint32_t *)entry->uuid) = atomic_add_and_fetch_uint32((uint32_t *)filelist->filelist_intern.curr_uuid, 1);
/* Only thing we change in direntry here, so we need to free it first. */
MEM_freeN(entry->relpath);
diff --git a/source/blender/editors/space_outliner/outliner_tree.c b/source/blender/editors/space_outliner/outliner_tree.c
index 96bab3d..17aefac 100644
--- a/source/blender/editors/space_outliner/outliner_tree.c
+++ b/source/blender/editors/space_outliner/outliner_tree.c
@@ -1108,8 +1108,12 @@ static TreeElement *outliner_add_element(SpaceOops *soops, ListBase *lb, void *i
tselem->flag &= ~TSE_CLOSED;
if (TSELEM_OPEN(tselem, soops)) {
- for (a = 0; a < tot; a++)
- outliner_add_element(soops, &te->subtree, (void *)ptr, te, TSE_RNA_PROPERTY, a);
+ for (a = 0; a < tot; a++) {
+ RNA_property_collection_lookup_int(ptr, iterprop, a, &propptr);
+ if (!(RNA_property_flag(propptr.data) & PROP_HIDDEN)) {
+ outliner_add_element(soops, &te->subtree, (void *)ptr, te, TSE_RNA_PROPERTY, a);
+ }
+ }
}
else if (tot)
te->flag |= TE_LAZY_CLOSED;
diff --git a/source/blender/gpu/shaders/gpu_shader_material.glsl b/source/blender/gpu/shaders/gpu_shader_material.glsl
index 67da820..549c979 100644
--- a/source/blender/gpu/shaders/gpu_shader_material.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_material.glsl
@@ -1358,7 +1358,7 @@ void mtex_cube_map_refl_from_refldir(
samplerCube ima, vec3 reflecteddirection, out float value, out vec4 color)
{
color = textureCube(ima, reflecteddirection);
- value = 1.0;
+ value = color.a;
}
void mtex_cube_map_refl(
diff --git a/source/blender/imbuf/intern/oiio/CMakeLists.txt b/source/blender/imbuf/intern/oiio/CMakeLists.txt
index c873fa3..a4fb9c5 100644
--- a/source/blender/imbuf/intern/oiio/CMakeLists.txt
+++ b/source/blender/imbuf/intern/oiio/CMakeLists.txt
@@ -49,6 +49,11 @@ if(WITH_OPENIMAGEIO)
${OPENIMAGEIO_INCLUDE_DIRS}
${BOOST_INCLUDE_DIR}
)
+ if(WITH_IMAGE_OPENEXR)
+ list(APPEND INC_SYS
+ ${OPENEXR_INCLUDE_DIRS}
+ )
+ endif()
add_definitions(-DWITH_OPENIMAGEIO)
endif()
diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h
index 02a0b41..0f71dff 100644
--- a/source/blender/makesdna/DNA_userdef_types.h
+++ b/source/blender/makesdna/DNA_userdef_types.h
@@ -867,14 +867,6 @@ typedef enum eNdof_Flag {
#define NDOF_PIXELS_PER_SECOND 600.0f
-/* compute_device_type */
-typedef enum eCompute_Device_Type {
- USER_COMPUTE_DEVICE_NONE = 0,
- USER_COMPUTE_DEVICE_OPENCL = 1,
- USER_COMPUTE_DEVICE_CUDA = 2,
-} eCompute_Device_Type;
-
-
typedef enum eMultiSample_Type {
USER_MULTISAMPLE_NONE = 0,
USER_MULTISAMPLE_2 = 2,
diff --git a/source/blender/makesrna/intern/rna_main.c b/source/blender/makesrna/intern/rna_main.c
index bf01619..33246c0 100644
--- a/source/blender/makesrna/intern/rna_main.c
+++ b/source/blender/makesrna/intern/rna_main.c
@@ -399,7 +399,7 @@ void RNA_def_main(BlenderRNA *brna)
RNA_def_property_ui_text(prop, "Use Autopack", "Automatically pack all external data into .blend file");
prop = RNA_def_int_vector(srna, "version", 3, NULL, 0, INT_MAX,
- "Version", "Version of the blender the .blend was saved with", 0, INT_MAX);
+ "Version", "Version of Blender the .blend was saved with", 0, INT_MAX);
RNA_def_property_int_funcs(prop, "rna_Main_version_get", NULL, NULL);
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
RNA_def_property_flag(prop, PROP_THICK_WRAP);
diff --git a/source/blender/makesrna/intern/rna_mesh_api.c b/source/blender/makesrna/intern/rna_mesh_api.c
index a3bc21b..97a618b 100644
--- a/source/blender/makesrna/intern/rna_mesh_api.c
+++ b/source/blender/makesrna/intern/rna_mesh_api.c
@@ -240,6 +240,9 @@ void RNA_api_mesh(StructRNA *srna)
func = RNA_def_function(srna, "free_normals_split", "rna_Mesh_free_normals_split");
RNA_def_function_ui_description(func, "Free split vertex normals");
+ func = RNA_def_function(srna, "split_faces", "BKE_mesh_split_faces");
+ RNA_def_function_ui_description(func, "Spli faces based on the edge angle");
+
func = RNA_def_function(srna, "calc_tangents", "rna_Mesh_calc_tangents");
RNA_def_function_flag(func, FUNC_USE_REPORTS);
RNA_def_function_ui_description(func,
diff --git a/source/blender/makesrna/intern/rna_render.c b/source/blender/makesrna/intern/rna_render.c
index 8438270..12f20f5 100644
--- a/source/blender/makesrna/intern/rna_render.c
+++ b/source/blender/makesrna/intern/rna_render.c
@@ -78,9 +78,14 @@ EnumPropertyItem rna_enum_render_pass_type_items[] = {
};
EnumPropertyItem rna_enum_render_pass_debug_type_items[] = {
- {RENDER_PASS_DEBUG_BVH_TRAVERSAL_STEPS, "BVH_TRAVERSAL_STEPS", 0, "BVH Traversal Steps", ""},
- {RENDER_PASS_DEBUG_BVH_TRAVERSED_INSTANCES, "BVH_TRAVERSED_INSTANCES", 0, "BVH Traversed Instances", ""},
- {RENDER_PASS_DEBUG_RAY_BOUNCES, "RAY_BOUNCES", 0, "Ray Steps", ""},
+ {RENDER_PASS_DEBUG_BVH_TRAVERSED_NODES, "BVH_TRAVERSED_NODES", 0, "BVH Traversed Nodes",
+ "Number of nodes traversed in BVH for the camera rays"},
+ {RENDER_PASS_DEBUG_BVH_TRAVERSED_INSTANCES, "BVH_TRAVERSED_INSTANCES", 0, "BVH Traversed Instances",
+ "Number of BVH instances traversed by camera rays"},
+ {RENDER_PASS_DEBUG_BVH_INTERSECTIONS, "BVH_INTERSECTIONS", 0, "BVH Intersections",
+ "Number of primitive intersections performed by the camera rays"},
+ {RENDER_PASS_DEBUG_RAY_BOUNCES, "RAY_BOUNCES", 0, "Ray Steps",
+ "Number of bounces done by the main integration loop"},
{0, NULL, 0, NULL, NULL}
};
diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c
index 73a5ae5..b16e886 100644
--- a/source/blender/makesrna/intern/rna_userdef.c
+++ b/source/blender/makesrna/intern/rna_userdef.c
@@ -52,15 +52,6 @@
#include "BLT_lang.h"
#include "GPU_buffers.h"
-#ifdef WITH_CYCLES
-static EnumPropertyItem compute_device_type_items[] = {
- {USER_COMPUTE_DEVICE_NONE, "NONE", 0, "None", "Don't use compute device"},
- {USER_COMPUTE_DEVICE_CUDA, "CUDA", 0, "CUDA", "Use CUDA for GPU acceleration"},
- {USER_COMPUTE_DEVICE_OPENCL, "OPENCL", 0, "OpenCL", "Use OpenCL for GPU acceleration"},
- { 0, NULL, 0, NULL, NULL}
-};
-#endif
-
#ifdef WITH_OPENSUBDIV
static EnumPropertyItem opensubdiv_compute_type_items[] = {
{USER_OPENSUBDIV_COMPUTE_NONE, "NONE", 0, "None", ""},
@@ -124,8 +115,6 @@ static EnumPropertyItem rna_enum_language_default_items[] = {
#include "UI_interface.h"
-#include "CCL_api.h"
-
#ifdef WITH_OPENSUBDIV
# include "opensubdiv_capi.h"
#endif
@@ -135,6 +124,14 @@ static EnumPropertyItem rna_enum_language_default_items[] = {
#endif
+static void rna_userdef_version_get(PointerRNA *ptr, int *value)
+{
+ UserDef *userdef = (UserDef *)ptr->data;
+ value[0] = userdef->versionfile / 100;
+ value[1] = userdef->versionfile % 100;
+ value[2] = userdef->subversionfile;
+}
+
static void rna_userdef_update(Main *UNUSED(bmain), Scene *UNUSED(scene), PointerRNA *UNUSED(ptr))
{
WM_main_add_notifier(NC_WINDOW, NULL);
@@ -476,78 +473,6 @@ static PointerRNA rna_Theme_space_list_generic_get(PointerRNA *ptr)
}
-#ifdef WITH_CYCLES
-static EnumPropertyItem *rna_userdef_compute_device_type_itemf(bContext *UNUSED(C), PointerRNA *UNUSED(ptr),
- PropertyRNA *UNUSED(prop), bool *r_free)
-{
- EnumPropertyItem *item = NULL;
- int totitem = 0;
-
- /* add supported device types */
- RNA_enum_items_add_value(&item, &totitem, compute_device_type_items, USER_COMPUTE_DEVICE_NONE);
- if (CCL_compute_device_list(0))
- RNA_enum_items_add_value(&item, &totitem, compute_device_type_items, USER_COMPUTE_DEVICE_CUDA);
- if (CCL_compute_device_list(1))
- RNA_enum_items_add_value(&item, &totitem, compute_device_type_items, USER_COMPUTE_DEVICE_OPENCL);
-
- RNA_enum_item_end(&item, &totitem);
- *r_free = true;
-
- return item;
-}
-
-static int rna_userdef_compute_device_get(PointerRNA *UNUSED(ptr))
-{
- if (U.compute_device_type == USER_COMPUTE_DEVICE_NONE)
- return 0;
-
- return U.compute_device_id;
-}
-
-static EnumPropertyItem *rna_userdef_compute_device_itemf(bContext *UNUSED(C), PointerRNA *UNUSED(ptr),
- PropertyRNA *UNUSED(prop), bool *r_free)
-{
- EnumPropertyItem tmp = {0, "", 0, "", ""};
- EnumPropertyItem *item = NULL;
- int totitem = 0;
-
- if (U.compute_device_type == USER_COMPUTE_DEVICE_NONE) {
- /* only add a single CPU device */
- tmp.value = 0;
- tmp.name = "CPU";
- tmp.identifier = "CPU";
- RNA_enum_item_add(&item, &totitem, &tmp);
- }
- else {
- /* get device list from cycles. it would be good to make this generic
- * once we have more subsystems using opencl, for now this is easiest */
- int opencl = (U.compute_device_type == USER_COMPUTE_DEVICE_OPENCL);
- CCLDeviceInfo *devices = CCL_compute_device_list(opencl);
- int a;
-
- if (devices) {
- for (a = 0; devices[a].identifier[0]; a++) {
- tmp.value = devices[a].value;
- tmp.identifier = devices[a].identifier;
- tmp.name = devices[a].name;
- RNA_enum_item_add(&item, &totitem, &tmp);
- }
- }
- else {
- tmp.value = 0;
- tmp.name = "CPU";
- tmp.identifier = "CPU";
- RNA_enum_item_add(&item, &totitem, &tmp);
- }
- }
-
- RNA_enum_item_end(&item, &totitem);
- *r_free = true;
-
- return item;
-}
-#endif
-
#ifdef WITH_OPENSUBDIV
static EnumPropertyItem *rna_userdef_opensubdiv_compute_type_itemf(bContext *UNUSED(C), PointerRNA *UNUSED(ptr),
PropertyRNA *UNUSED(prop), bool *r_free)
@@ -3967,13 +3892,6 @@ static void rna_def_userdef_system(BlenderRNA *brna)
{0, NULL, 0, NULL, NULL}
};
-#ifdef WITH_CYCLES
- static EnumPropertyItem compute_device_items[] = {
- {0, "CPU", 0, "CPU", ""},
- { 0, NULL, 0, NULL, NULL}
- };
-#endif
-
static EnumPropertyItem image_draw_methods[] = {
{IMAGE_DRAW_METHOD_2DTEXTURE, "2DTEXTURE", 0, "2D Texture", "Use CPU for display transform and draw image with 2D texture"},
{IMAGE_DRAW_METHOD_GLSL, "GLSL", 0, "GLSL", "Use GLSL shaders for display transform and draw image with 2D texture"},
@@ -4265,23 +4183,6 @@ static void rna_def_userdef_system(BlenderRNA *brna)
"Draw tool/property regions over the main region, when using Triple Buffer");
RNA_def_property_update(prop, 0, "rna_userdef_dpi_update");
-#ifdef WITH_CYCLES
- prop = RNA_def_property(srna, "compute_device_type", PROP_ENUM, PROP_NONE);
- RNA_def_property_flag(prop, PROP_ENUM_NO_CONTEXT);
- RNA_def_property_enum_sdna(prop, NULL, "compute_device_type");
- RNA_def_property_enum_items(prop, compute_device_type_items);
- RNA_def_property_enum_funcs(prop, NULL, NULL, "rna_userdef_compute_device_type_itemf");
- RNA_def_property_ui_text(prop, "Compute Device Type", "Device to use for computation (rendering with Cycles)");
- RNA_def_property_update(prop, NC_SPACE | ND_SPACE_PROPERTIES, NULL);
-
- prop = RNA_def_property(srna, "compute_device", PROP_ENUM, PROP_NONE);
- RNA_def_property_flag(prop, PROP_ENUM_NO_CONTEXT);
- RNA_def_property_enum_sdna(prop, NULL, "compute_device_id");
- RNA_def_property_enum_items(prop, compute_device_items);
- RNA_def_property_enum_funcs(prop, "rna_userdef_compute_device_get", NULL, "rna_userdef_compute_device_itemf");
- RNA_def_property_ui_text(prop, "Compute Device", "Device to use for computation");
-#endif
-
#ifdef WITH_OPENSUBDIV
prop = RNA_def_property(srna, "opensubdiv_compute_type", PROP_ENUM, PROP_NONE);
RNA_def_property_flag(prop, PROP_ENUM_NO_CONTEXT);
@@ -4291,6 +4192,14 @@ static void rna_def_userdef_system(BlenderRNA *brna)
RNA_def_property_ui_text(prop, "OpenSubdiv Compute Type", "Type of computer back-end used with OpenSubdiv");
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_PROPERTIES, "rna_userdef_opensubdiv_update");
#endif
+
+#ifdef WITH_CYCLES
+ prop = RNA_def_property(srna, "legacy_compute_device_type", PROP_INT, PROP_NONE);
+ RNA_def_property_int_sdna(prop, NULL, "compute_device_type");
+ RNA_def_property_clear_flag(prop, PROP_EDITABLE);
+ RNA_def_property_flag(prop, PROP_HIDDEN);
+ RNA_def_property_ui_text(prop, "Legacy Compute Device Type", "For backwards compatibility only");
+#endif
}
static void rna_def_userdef_input(BlenderRNA *brna)
@@ -4808,6 +4717,12 @@ void RNA_def_userdef(BlenderRNA *brna)
RNA_def_property_pointer_funcs(prop, "rna_UserDef_system_get", NULL, NULL, NULL);
RNA_def_property_ui_text(prop, "System & OpenGL", "Graphics driver and operating system settings");
+ prop = RNA_def_int_vector(srna, "version", 3, NULL, 0, INT_MAX,
+ "Version", "Version of Blender the userpref.blend was saved with", 0, INT_MAX);
+ RNA_def_property_int_funcs(prop, "rna_userdef_version_get", NULL, NULL);
+ RNA_def_property_clear_flag(prop, PROP_EDITABLE);
+ RNA_def_property_flag(prop, PROP_THICK_WRAP);
+
rna_def_userdef_view(brna);
rna_def_userdef_edit(brna);
rna_def_userdef_input(brna);
diff --git a/source/blender/modifiers/intern/MOD_hook.c b/source/blender/modifiers/intern/MOD_hook.c
index 83c4ca7..9186b10 100644
--- a/source/blender/modifiers/intern/MOD_hook.c
+++ b/source/blender/modifiers/intern/MOD_hook.c
@@ -145,12 +145,9 @@ static void updateDepsgraph(ModifierData *md,
HookModifierData *hmd = (HookModifierData *)md;
if (hmd->object != NULL) {
if (hmd->subtarget[0]) {
- DEG_add_bone_relation(node, hmd->object, hmd->subtarget, DEG_OB_COMP_TRANSFORM, "Hook Modifier");
DEG_add_bone_relation(node, hmd->object, hmd->subtarget, DEG_OB_COMP_BONE, "Hook Modifier");
}
- else {
- DEG_add_object_relation(node, hmd->object, DEG_OB_COMP_TRANSFORM, "Hook Modifier");
- }
+ DEG_add_object_relation(node, hmd->object, DEG_OB_COMP_TRANSFORM, "Hook Modifier");
}
/* We need own transformation as well. */
DEG_add_object_relation(node, ob, DEG_OB_COMP_TRANSFORM, "Hook Modifier");
diff --git a/source/blender/nodes/shader/nodes/node_shader_light_path.c b/source/blender/nodes/shader/nodes/node_shader_light_path.c
index b1001cd..052f2a6 100644
--- a/source/blender/nodes/shader/nodes/node_shader_light_path.c
+++ b/source/blender/nodes/shader/nodes/node_shader_light_path.c
@@ -39,6 +39,8 @@ static bNodeSocketTemplate sh_node_light_path_out[] = {
{ SOCK_FLOAT, 0, N_("Is Transmission Ray"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
{ SOCK_FLOAT, 0, N_("Ray Length"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
{ SOCK_FLOAT, 0, N_("Ray Depth"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
+ { SOCK_FLOAT, 0, N_("Diffuse Depth"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
+ { SOCK_FLOAT, 0, N_("Glossy Depth"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
{ SOCK_FLOAT, 0, N_("Transparent Depth"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
{ SOCK_FLOAT, 0, N_("Transmission Depth"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
{ -1, 0, "" }
diff --git a/source/blender/nodes/shader/nodes/node_shader_tex_brick.c b/source/blender/nodes/shader/nodes/node_shader_tex_brick.c
index bb7f216..0be47c4 100644
--- a/source/blender/nodes/shader/nodes/node_shader_tex_brick.c
+++ b/source/blender/nodes/shader/nodes/node_shader_tex_brick.c
@@ -36,6 +36,7 @@ static bNodeSocketTemplate sh_node_tex_brick_in[] = {
{ SOCK_RGBA, 1, N_("Mortar"), 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
{ SOCK_FLOAT, 1, N_("Scale"), 5.0f, 0.0f, 0.0f, 0.0f, -1000.0f, 1000.0f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
{ SOCK_FLOAT, 1, N_("Mortar Size"), 0.02f, 0.0f, 0.0f, 0.0f, 0.0f, 0.125f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
+ { SOCK_FLOAT, 1, N_("Mortar Smooth"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
{ SOCK_FLOAT, 1, N_("Bias"), 0.0f, 0.0f, 0.0f, 0.0f, -1.0f, 1.0f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
{ SOCK_FLOAT, 1, N_("Brick Width"), 0.5f, 0.0f, 0.0f, 0.0f, 0.01f, 100.0f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
{ SOCK_FLOAT, 1, N_("Row Height"), 0.25f, 0.0f, 0.0f, 0.0f, 0.01f, 100.0f, PROP_NONE, SOCK_NO_INTERNAL_LINK},
@@ -60,6 +61,12 @@ static void node_shader_init_tex_brick(bNodeTree *UNUSED(ntree), bNode *node)
tex->squash_freq = 2;
node->storage = tex;
+
+ for (bNodeSocket *sock = node->inputs.first; sock; sock = sock->next) {
+ if (STREQ(sock->name, "Mortar Smooth")) {
+ ((bNodeSocketValueFloat*)sock->default_value)->value = 0.1f;
+ }
+ }
}
static int node_shader_gpu_tex_brick(GPUMaterial *mat, bNode *node, bNodeExecData *UNUSED(execdata), GPUNodeStack *in, GPUNodeStack *out)
diff --git a/source/blender/render/extern/include/RE_pipeline.h b/source/blender/render/extern/include/RE_pipeline.h
index 509ad6f..85311bd 100644
--- a/source/blender/render/extern/include/RE_pipeline.h
+++ b/source/blender/render/extern/include/RE_pipeline.h
@@ -98,9 +98,10 @@ typedef struct RenderPass {
} RenderPass;
enum {
- RENDER_PASS_DEBUG_BVH_TRAVERSAL_STEPS = 0,
+ RENDER_PASS_DEBUG_BVH_TRAVERSED_NODES = 0,
RENDER_PASS_DEBUG_BVH_TRAVERSED_INSTANCES = 1,
RENDER_PASS_DEBUG_RAY_BOUNCES = 2,
+ RENDER_PASS_DEBUG_BVH_INTERSECTIONS = 3,
};
/* a renderlayer is a full image, but with all passes and samples */
diff --git a/source/blender/render/intern/source/render_result.c b/source/blender/render/intern/source/render_result.c
index 6ea46af..bdb3b58 100644
--- a/source/blender/render/intern/source/render_result.c
+++ b/source/blender/render/intern/source/render_result.c
@@ -550,10 +550,12 @@ RenderPass *gp_add_pass(RenderResult *rr, RenderLayer *rl, int channels, int pas
const char *RE_debug_pass_name_get(int debug_type)
{
switch (debug_type) {
- case RENDER_PASS_DEBUG_BVH_TRAVERSAL_STEPS:
- return "BVH Traversal Steps";
+ case RENDER_PASS_DEBUG_BVH_TRAVERSED_NODES:
+ return "BVH Traversed Nodes";
case RENDER_PASS_DEBUG_BVH_TRAVERSED_INSTANCES:
return "BVH Traversed Instances";
+ case RENDER_PASS_DEBUG_BVH_INTERSECTIONS:
+ return "BVH Primitive Intersections";
case RENDER_PASS_DEBUG_RAY_BOUNCES:
return "Ray Bounces";
}
diff --git a/source/blenderplayer/bad_level_call_stubs/stubs.c b/source/blenderplayer/bad_level_call_stubs/stubs.c
index d8a4ddc..6040dff 100644
--- a/source/blenderplayer/bad_level_call_stubs/stubs.c
+++ b/source/blenderplayer/bad_level_call_stubs/stubs.c
@@ -142,7 +142,6 @@ struct wmWindowManager;
# pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
-#include "../../intern/cycles/blender/CCL_api.h"
#include "../../intern/dualcon/dualcon.h"
#include "../../intern/elbeem/extern/elbeem.h"
#include "../blender/blenkernel/BKE_modifier.h"
@@ -770,10 +769,6 @@ void *dualcon(const DualConInput *input_mesh,
float scale,
int depth) RET_ZERO
-/* intern/cycles */
-struct CCLDeviceInfo;
-struct CCLDeviceInfo *CCL_compute_device_list(int opencl) RET_NULL
-
/* compositor */
void COM_execute(RenderData *rd, Scene *scene, bNodeTree *editingtree, int rendering,
const ColorManagedViewSettings *viewSettings, const ColorManagedDisplaySettings *displaySettings,
diff --git a/source/creator/CMakeLists.txt b/source/creator/CMakeLists.txt
index f65688e..2df5ddc 100644
--- a/source/creator/CMakeLists.txt
+++ b/source/creator/CMakeLists.txt
@@ -713,10 +713,7 @@ elseif(WIN32)
)
if(WITH_PYTHON_INSTALL_NUMPY)
- set(PYTHON_NUMPY_VERSION 1.9)
- if(MSVC_VERSION EQUAL 1900)
- set(PYTHON_NUMPY_VERSION 1.11)
- endif()
+ set(PYTHON_NUMPY_VERSION 1.10)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${BLENDER_VERSION}/python/lib/site-packages
COMMAND ${CMAKE_COMMAND} -E
make_directory ${CMAKE_CURRENT_BINARY_DIR}/${BLENDER_VERSION}/python/lib/site-packages)
@@ -830,11 +827,12 @@ elseif(WIN32)
else()
install(
FILES
- ${LIBDIR}/ffmpeg/lib/avcodec-55.dll
- ${LIBDIR}/ffmpeg/lib/avformat-55.dll
- ${LIBDIR}/ffmpeg/lib/avdevice-55.dll
- ${LIBDIR}/ffmpeg/lib/avutil-52.dll
- ${LIBDIR}/ffmpeg/lib/swscale-2.dll
+ ${LIBDIR}/ffmpeg/lib/avcodec-57.dll
+ ${LIBDIR}/ffmpeg/lib/avformat-57.dll
+ ${LIBDIR}/ffmpeg/lib/avdevice-57.dll
+ ${LIBDIR}/ffmpeg/lib/avutil-55.dll
+ ${LIBDIR}/ffmpeg/lib/swscale-4.dll
+ ${LIBDIR}/ffmpeg/lib/swresample-2.dll
DESTINATION "."
)
endif()
diff --git a/source/gameengine/VideoTexture/VideoDeckLink.cpp b/source/gameengine/VideoTexture/VideoDeckLink.cpp
index 4f5e348..c588a4b 100644
--- a/source/gameengine/VideoTexture/VideoDeckLink.cpp
+++ b/source/gameengine/VideoTexture/VideoDeckLink.cpp
@@ -544,12 +544,12 @@ HRESULT STDMETHODCALLTYPE PinnedMemoryAllocator::QueryInterface(REFIID /*iid*/,
ULONG STDMETHODCALLTYPE PinnedMemoryAllocator::AddRef(void)
{
- return atomic_add_uint32(&mRefCount, 1U);
+ return atomic_add_and_fetch_uint32(&mRefCount, 1U);
}
ULONG STDMETHODCALLTYPE PinnedMemoryAllocator::Release(void)
{
- uint32_t newCount = atomic_sub_uint32(&mRefCount, 1U);
+ uint32_t newCount = atomic_sub_and_fetch_uint32(&mRefCount, 1U);
if (newCount == 0)
delete this;
return (ULONG)newCount;
diff --git a/tests/python/cycles_render_tests.py b/tests/python/cycles_render_tests.py
index 78b4b34..fa4b3f2 100755
--- a/tests/python/cycles_render_tests.py
+++ b/tests/python/cycles_render_tests.py
@@ -15,6 +15,7 @@ def render_file(filepath):
"--background",
"-noaudio",
"--factory-startup",
+ "--enable-autoexec",
filepath,
"-E", "CYCLES",
# Run with OSL enabled
--
blender packaging
More information about the pkg-multimedia-commits
mailing list