pax_global_header00006660000000000000000000000064151445337170014524gustar00rootroot0000000000000052 comment=8e14fd8cd556635450232f4ec5f6283e2a1d3ac1 level-zero-raytracing-support-1.2.3/000077500000000000000000000000001514453371700174665ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/.github/000077500000000000000000000000001514453371700210265ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/.github/workflows/000077500000000000000000000000001514453371700230635ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/.github/workflows/continuous.yml000066400000000000000000000257061514453371700260260ustar00rootroot00000000000000## Copyright 2022 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 name: continuous permissions: read-all on: [push, workflow_dispatch] jobs: ze_raytracing-ubuntu22_04-GCC: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: embree/ubuntu:22.04 runs-on: '[ "Linux", "docker", "build" ]' cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_CXX_COMPILER=g++ -D CMAKE_C_COMPILER=gcc -D CMAKE_BUILD_TYPE=Release .. cmake --build . --target package ze_raytracing-linux-DPCPP-test: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: embree/ubuntu:22.04 runs-on: '[ "Linux", "docker", "build" ]' env-from-files: ./.github/workflows/dpcpp-sycl-nightly.env artifact-out: ze_raytracing-linux-DPCPP-test artifact-path: ./build/intel-level-zero-gpu-raytracing*.tar.gz cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_BUILD_TYPE=ReleaseInternal -D CMAKE_CXX_COMPILER=clang++ -D CMAKE_C_COMPILER=clang -D ZE_RAYTRACING_TBB=build_static -D ZE_RAYTRACING_SYCL_TESTS=INTERNAL_RTAS_BUILDER .. cmake --build . --target package ze_raytracing-linux-DPCPP-test-DG2: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main needs: ["ze_raytracing-linux-DPCPP-test"] with: image: embree/ubuntu:22.04 options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "dg2" ]' env-from-files: ./.github/workflows/gfx-ubuntu22-internal.env artifact-in: ze_raytracing-linux-DPCPP-test cmd: | cd build tar xzf intel-level-zero-gpu-raytracing*.x86_64.linux.tar.gz cd intel-level-zero-gpu-raytracing*.x86_64.linux ./run.sh ./run_ext.sh cd bin ctest -VV ze_raytracing-linux-DPCPP-test-PVC: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main needs: ["ze_raytracing-linux-DPCPP-test"] with: image: embree/ubuntu:22.04 options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "pvc" ]' env-from-files: ./.github/workflows/gfx-ubuntu22-internal.env artifact-in: ze_raytracing-linux-DPCPP-test cmd: | cd build tar xzf intel-level-zero-gpu-raytracing*.x86_64.linux.tar.gz cd intel-level-zero-gpu-raytracing*.x86_64.linux ./run.sh ./run_ext.sh cd bin ctest -VV ze_raytracing-linux-ICX-test: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: embree/ubuntu:22.04 runs-on: '[ "Linux", "docker", "build" ]' dpcpp-version: intel/2025.2.1.7 artifact-out: ze_raytracing-linux-ICX-test artifact-path: ./build/intel-level-zero-gpu-raytracing*.tar.gz cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_BUILD_TYPE=ReleaseInternal -D CMAKE_CXX_COMPILER=icpx -D CMAKE_C_COMPILER=icx -D ZE_RAYTRACING_TBB=inject_headers -D ZE_RAYTRACING_SYCL_TESTS=INTERNAL_RTAS_BUILDER .. cmake --build . --target package ze_raytracing-linux-ICX-test-DG2: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main needs: ["ze_raytracing-linux-ICX-test"] with: image: embree/ubuntu:22.04 options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "dg2" ]' env-from-files: ./.github/workflows/gfx-ubuntu22-internal.env artifact-in: ze_raytracing-linux-ICX-test cmd: | cd build tar xzf intel-level-zero-gpu-raytracing*.x86_64.linux.tar.gz cd intel-level-zero-gpu-raytracing*.x86_64.linux ./run.sh ./run_ext.sh cd bin LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:. ctest -VV ze_raytracing-linux-ICX-test-level-zero: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: embree/ubuntu:22.04 runs-on: '[ "Linux", "docker", "build" ]' dpcpp-version: intel/2025.2.1.7 artifact-out: ze_raytracing-linux-ICX-test-level-zero artifact-path: ./build/intel-level-zero-gpu-raytracing*.tar.gz cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_BUILD_TYPE=ReleaseInternal -D CMAKE_CXX_COMPILER=icpx -D CMAKE_C_COMPILER=icx -D ZE_RAYTRACING_TBB=normal -D ZE_RAYTRACING_SYCL_TESTS=LEVEL_ZERO_RTAS_BUILDER .. cmake --build . --target package ze_raytracing-linux-ICX-test-level-zero-DG2: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main needs: ["ze_raytracing-linux-ICX-test-level-zero"] with: image: embree/ubuntu:22.04 options: --device=/dev/dri:/dev/dri runs-on: '[ "Linux", "docker", "dg2" ]' env-from-files: ./.github/workflows/gfx-ubuntu22-internal.env artifact-in: ze_raytracing-linux-ICX-test-level-zero cmd: | cd build tar xzf intel-level-zero-gpu-raytracing*.x86_64.linux.tar.gz cd intel-level-zero-gpu-raytracing*.x86_64.linux ./run.sh ./run_ext.sh cd bin LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:. ctest -VV ze_raytracing-windows-VS2022-Debug: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main with: runs-on: '[ "Windows", "build" ]' cmd: | mkdir build cd build cmake -G "Visual Studio 17 2022" -A "x64" .. cmake --build . --config Debug --target package ze_raytracing-windows-VS2022-Release: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main with: runs-on: '[ "Windows", "build" ]' cmd: | mkdir build cd build cmake -G "Visual Studio 17 2022" -A "x64" .. cmake --build . --config Release --target package ze_raytracing-windows-VS2022-ReleaseInternal: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main with: runs-on: '[ "Windows", "build" ]' cmd: | mkdir build cd build cmake -G "Visual Studio 17 2022" -A "x64" .. cmake --build . --config ReleaseInternal --target package ze_raytracing-windows-test: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main with: runs-on: '[ "Windows", "build" ]' env-from-files: ./.github/workflows/dpcpp-sycl-nightly.env artifact-path: ./build/intel-level-zero-gpu-raytracing*.zip artifact-out: ze_raytracing-windows-test cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_BUILD_TYPE=ReleaseInternal -D CMAKE_CXX_COMPILER=clang++ -D CMAKE_C_COMPILER=clang -D ZE_RAYTRACING_SYCL_TESTS=INTERNAL_RTAS_BUILDER .. cmake --build . --target package ze_raytracing-windows-test-DG2: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main needs: ["ze_raytracing-windows-test"] with: runs-on: '[ "Windows", "dg2" ]' env-from-files: ./.github/workflows/gfx-windows-internal.env artifact-in: ze_raytracing-windows-test cmd: | cd build unzip intel-level-zero-gpu-raytracing*.x64.windows.zip cd intel-level-zero-gpu-raytracing*.x64.windows .\run.bat if ($LASTEXITCODE -ne 0) { throw "Command failed" } cd bin ctest -VV if ($LASTEXITCODE -ne 0) { throw "Command failed" } ze_raytracing-windows-test-level-zero: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main with: runs-on: '[ "Windows", "build" ]' env-from-files: ./.github/workflows/dpcpp-sycl-nightly.env artifact-path: ./build/intel-level-zero-gpu-raytracing*.zip artifact-out: ze_raytracing-windows-test-level-zero cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_BUILD_TYPE=ReleaseInternal -D CMAKE_CXX_COMPILER=clang++ -D CMAKE_C_COMPILER=clang -D ZE_RAYTRACING_SYCL_TESTS=LEVEL_ZERO_RTAS_BUILDER .. cmake --build . --target package ze_raytracing-windows-test-level-zero-DG2: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main needs: ["ze_raytracing-windows-test-level-zero"] with: runs-on: '[ "Windows", "dg2" ]' env-from-files: ./.github/workflows/gfx-windows-internal.env artifact-in: ze_raytracing-windows-test-level-zero cmd: | cd build unzip intel-level-zero-gpu-raytracing*.x64.windows.zip cd intel-level-zero-gpu-raytracing*.x64.windows .\run.bat if ($LASTEXITCODE -ne 0) { throw "Command failed" } cd bin ctest -VV if ($LASTEXITCODE -ne 0) { throw "Command failed" } static-analysis-coverity-linux: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/static_analysis.yml@main with: project: "Level Zero Ray Tracing Support" server: prod4 prebuild: | cmake -S . -B build -D CMAKE_CXX_COMPILER=g++ -D CMAKE_C_COMPILER=gcc -D CMAKE_BUILD_TYPE=Release -D ZE_RAYTRACING_TBB=build_static build: cmake --build build os: Linux static-analysis-coverity-windows: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/static_analysis.yml@main with: project: "Level Zero Ray Tracing Support" server: prod4 prebuild: | cmake -S . -B build -D CMAKE_CXX_COMPILER=g++ -D CMAKE_C_COMPILER=gcc -D CMAKE_BUILD_TYPE=Release -D ZE_RAYTRACING_TBB=build_static build: cmake --build build os: Windows success: runs-on: ubuntu-latest needs: - ze_raytracing-ubuntu22_04-GCC - ze_raytracing-linux-DPCPP-test - ze_raytracing-linux-DPCPP-test-DG2 - ze_raytracing-linux-DPCPP-test-PVC - ze_raytracing-linux-ICX-test - ze_raytracing-linux-ICX-test-DG2 - ze_raytracing-linux-ICX-test-level-zero - ze_raytracing-linux-ICX-test-level-zero-DG2 - ze_raytracing-windows-VS2022-Debug - ze_raytracing-windows-VS2022-Release - ze_raytracing-windows-VS2022-ReleaseInternal - ze_raytracing-windows-test - ze_raytracing-windows-test-DG2 - ze_raytracing-windows-test-level-zero - ze_raytracing-windows-test-level-zero-DG2 - static-analysis-coverity-linux - static-analysis-coverity-windows if: failure() || cancelled() steps: - name: Failure run: | echo "::notice title=Success::Workflow failed" exit 1 level-zero-raytracing-support-1.2.3/.github/workflows/dpcpp-sycl-nightly.env000066400000000000000000000000541514453371700273260ustar00rootroot00000000000000DPCPP_VERSION=intel-llvm/nightly-2024-06-10 level-zero-raytracing-support-1.2.3/.github/workflows/gfx-ubuntu22-internal.env000066400000000000000000000001321514453371700276530ustar00rootroot00000000000000GFX_DRIVER_VERSION=neo-builds/ci/master/ci-neo-master-035585/artifacts/linux/ubuntu/22.04 level-zero-raytracing-support-1.2.3/.github/workflows/gfx-windows-internal.env000066400000000000000000000001341514453371700276610ustar00rootroot00000000000000GFX_DRIVER_VERSION=gfx-driver-builds/ci/master/gfx-driver-ci-master-19835/artifacts/Windows level-zero-raytracing-support-1.2.3/.github/workflows/release.yml000066400000000000000000000051661514453371700252360ustar00rootroot00000000000000## Copyright 2022 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 name: release permissions: read-all on: [push, workflow_dispatch] jobs: release-ze_raytracing-ubuntu22_04-GCC: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/docker.yml@main with: image: embree/ubuntu:22.04 runs-on: '[ "Linux", "docker", "build" ]' artifact-out: release-ze_raytracing-ubuntu22_04-GCC artifact-path: ./build cmd: | mkdir build cd build cmake -G Ninja -D CMAKE_CXX_COMPILER=g++ -D CMAKE_C_COMPILER=gcc -D CMAKE_BUILD_TYPE=ReleaseInternal .. cmake --build . --target package release-ze_raytracing-windows-VS2022: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/windows.yml@main with: runs-on: '[ "Windows", "build" ]' artifact-path: ./build artifact-out: release-ze_raytracing-windows-VS2022 cmd: | mkdir build cd build cmake -G "Visual Studio 17 2022" -A "x64" .. cmake --build . --config ReleaseInternal --target package source-code-analysis: secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/source_analysis.yml@main with: project-name: c_levelzeroraytracingsupport_26693 path-to-src: . binary-analysis: needs: - release-ze_raytracing-ubuntu22_04-GCC - release-ze_raytracing-windows-VS2022 secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/binary_analysis.yml@main with: project: embree artifact-in-windows: release-ze_raytracing-windows-VS2022 artifact-in-linux: release-ze_raytracing-ubuntu22_04-GCC path: "build/*.zip build/*.gz" antivirus-scan: needs: - release-ze_raytracing-ubuntu22_04-GCC - release-ze_raytracing-windows-VS2022 secrets: inherit uses: intel-innersource/libraries.devops.renderkit.workflows/.github/workflows/antivirus_scan.yml@main with: project: embree artifact-in-windows: release-ze_raytracing-windows-VS2022 artifact-in-linux: release-ze_raytracing-ubuntu22_04-GCC path: "build/*.zip build/*.gz" success-release: runs-on: ubuntu-latest needs: - release-ze_raytracing-ubuntu22_04-GCC - release-ze_raytracing-windows-VS2022 - binary-analysis - antivirus-scan - source-code-analysis if: failure() || cancelled() steps: - name: Failure run: | echo "::notice title=Success::Workflow failed" exit 1 level-zero-raytracing-support-1.2.3/CHANGELOG.md000066400000000000000000000017671514453371700213120ustar00rootroot00000000000000Version History --------------- ### oneAPI Level Zero Ray Tracing Support 1.2.3 - Updating default TBB version for static TBB build to v2022.3.0. - Fixed potential global variable initialization order issue when linking TBB statically. - Moved packaging files to separate packaging folder. ### oneAPI Level Zero Ray Tracing Support 1.2.2 - Added headers for TBB 2021.6.0 and avoiding fetching headers of that TBB version. ### oneAPI Level Zero Ray Tracing Support 1.2.1 - Added support to specify TBB headers to use for compilation ### oneAPI Level Zero Ray Tracing Support 1.2.0 - Added support for Level Zero Extension ZE_extension_rtas - Updated to Level Zero API header 1.13.1 - Fixed wrong assertion that triggered when using device memory for RTAS. - Fixed compile issues when AVX was enabled. ### oneAPI Level Zero Ray Tracing Support 1.1.0 - Added support for PTL RTAS layout. ### oneAPI Level Zero Ray Tracing Support 1.0.0 - Initial implementation of oneAPI Level Zero Ray Tracing Support library level-zero-raytracing-support-1.2.3/CMakeLists.txt000066400000000000000000000201411514453371700222240ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 cmake_minimum_required(VERSION 3.1.0) project(ze_raytracing) SET(ZE_RAYTRACING_VERSION_MAJOR 1) SET(ZE_RAYTRACING_VERSION_MINOR 2) SET(ZE_RAYTRACING_VERSION_PATCH 3) SET(ZE_RAYTRACING_VERSION ${ZE_RAYTRACING_VERSION_MAJOR}.${ZE_RAYTRACING_VERSION_MINOR}.${ZE_RAYTRACING_VERSION_PATCH}) IF (ZE_RAYTRACING_VERSION_BUILD) SET(ZE_RAYTRACING_VERSION ${ZE_RAYTRACING_VERSION}.${ZE_RAYTRACING_VERSION_BUILD}) ENDIF() IF(COMMAND cmake_policy) if(POLICY CMP0135) message("set policy CMP0135 to NEW") cmake_policy(SET CMP0135 NEW) endif() ENDIF() SET(CMAKE_CXX_STANDARD 17) SET(EMBREE_CMAKEEXPORT_DIR "cmake") SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) # Build configurations to use SET(CMAKE_BUILD_TYPE "ReleaseInternal" CACHE STRING "Specifies the build type.") SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS Debug Release ReleaseInternal) SET(CMAKE_CONFIGURATION_TYPES "Debug;Release;ReleaseInternal" CACHE STRING "List of generated configurations." FORCE) # Debug build configuration IF (WIN32) # Link runtime statically under Windows STRING(REGEX REPLACE "/MD" "/MT" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") STRING(REGEX REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") ENDIF() MESSAGE("CMAKE_CXX_FLAGS_DEBUG = ${CMAKE_CXX_FLAGS_DEBUG}") MESSAGE("CMAKE_C_FLAGS_DEBUG = ${CMAKE_C_FLAGS_DEBUG}") MESSAGE("CMAKE_SHARED_LINKER_FLAGS_DEBUG = ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}") MESSAGE("CMAKE_EXE_LINKER_FLAGS_DEBUG = ${CMAKE_EXE_LINKER_FLAGS_DEBUG}") # Release build configuration SET(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) SET(CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELWITHDEBINFO}) SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE ${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}) SET(CMAKE_EXE_LINKER_FLAGS_RELEASE ${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}) IF (WIN32) # Link runtime statically under Windows STRING(REGEX REPLACE "/MD" "/MT" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") STRING(REGEX REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") ENDIF() IF (WIN32) # enable dynamic control flow guard mitigation under windows SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /guard:cf") SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /guard:cf") SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DynamicBase /guard:cf") SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DynamicBase /guard:cf") ENDIF() MESSAGE("CMAKE_CXX_FLAGS_RELEASE = ${CMAKE_CXX_FLAGS_RELEASE}") MESSAGE("CMAKE_C_FLAGS_RELEASE = ${CMAKE_C_FLAGS_RELEASE}") MESSAGE("CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") MESSAGE("CMAKE_EXE_LINKER_FLAGS_RELEASE = ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") # ReleaseInternal build configuration string(REPLACE "DNDEBUG" "DDEBUG" CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") string(REPLACE "DNDEBUG" "DDEBUG" CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELWITHDEBINFO}") SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL ${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}) SET(CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL ${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}) IF (WIN32) # Link runtime statically under Windows STRING(REGEX REPLACE "/MD" "/MT" CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELEASEINTERNAL}") STRING(REGEX REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL}") ENDIF() MESSAGE("CMAKE_CXX_FLAGS_RELEASEINTERNAL = ${CMAKE_CXX_FLAGS_RELEASEINTERNAL}") MESSAGE("CMAKE_C_FLAGS_RELEASEINTERNAL = ${CMAKE_C_FLAGS_RELEASEINTERNAL}") MESSAGE("CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL = ${CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL}") MESSAGE("CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL = ${CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL}") # configure resource file file CONFIGURE_FILE( "${PROJECT_SOURCE_DIR}/level_zero_raytracing.rc.in" "${PROJECT_SOURCE_DIR}/level_zero_raytracing.rc" ) # default TBB mode SET(ZE_RAYTRACING_TBB_DEFAULT "build_static") # still support ZE_RAYTRACING_TBB_STATIC if defined IF (DEFINED ZE_RAYTRACING_TBB_STATIC) IF(ZE_RAYTRACING_TBB_STATIC) SET(ZE_RAYTRACING_TBB_DEFAULT "build_static") ELSE() SET(ZE_RAYTRACING_TBB_DEFAULT "normal") ENDIF() ENDIF() # TBB mode configuration SET(ZE_RAYTRACING_TBB ${ZE_RAYTRACING_TBB_DEFAULT} CACHE STRING "Configures how to use TBB: build_static builds specified TBB version and links statically, inject_headers uses headers of specified TBB version, and normal links against system provided TBB)") SET_PROPERTY(CACHE ZE_RAYTRACING_TBB PROPERTY STRINGS "build_static" "inject_header" "normal") SET(ZE_RAYTRACING_DEFAULT_TBB_VERSION v2022.3.0) IF (ZE_RAYTRACING_TBB STREQUAL "build_static") SET(TBB_STATIC ON) SET(TBB_HEADER OFF) ELSEIF (ZE_RAYTRACING_TBB STREQUAL "inject_headers") SET(TBB_STATIC OFF) SET(TBB_HEADER ON) SET(ZE_RAYTRACING_DEFAULT_TBB_VERSION v2021.6.0) # headers of that version are included ELSEIF (ZE_RAYTRACING_TBB STREQUAL "normal") SET(TBB_STATIC OFF) SET(TBB_HEADER OFF) ELSE() MESSAGE(FATAL_ERROR "Unknown TBB mode ${ZE_RAYTRACING_TBB}") ENDIF() SET(ZE_RAYTRACING_TBB_VERSION ${ZE_RAYTRACING_DEFAULT_TBB_VERSION} CACHE STRING "TBB version to use.") IF (TBB_STATIC OR TBB_HEADER) INCLUDE(fetchtbb) ENDIF() IF (NOT TBB_STATIC) FIND_PACKAGE(TBB) IF (NOT TBB_FOUND) find_package(PkgConfig REQUIRED) pkg_check_modules(TBB REQUIRED tbb) IF(NOT TARGET TBB::tbb) add_library(TBB::tbb INTERFACE IMPORTED) target_link_libraries(TBB::tbb INTERFACE ${TBB_LIBRARIES}) target_include_directories(TBB::tbb INTERFACE ${TBB_INCLUDE_DIRS}) set(TBB_VERSION ${TBB_VERSION}) ENDIF() ENDIF() message(STATUS "Found TBB version: ${TBB_VERSION}") ENDIF() ADD_DEFINITIONS(-DTASKING_TBB) # add path to TBB headers to use for compilation IF (ZE_RAYTRACING_TBB_HEADER_DIR) INCLUDE_DIRECTORIES(${ZE_RAYTRACING_TBB_HEADER_DIR}) IF (NOT EXISTS "${ZE_RAYTRACING_TBB_HEADER_DIR}/tbb/tbb.h") MESSAGE(FATAL_ERROR "TBB headers not found at ${ZE_RAYTRACING_TBB_HEADER_DIR}") ENDIF() MESSAGE(STATUS "Using TBB headers ${ZE_RAYTRACING_TBB_HEADER_DIR}") ENDIF() OPTION(ZE_RAYTRACING_RT_SIMULATION "Using hardware simulation" OFF) IF (ZE_RAYTRACING_RT_SIMULATION AND (NOT ZE_RAYTRACING_RT_VALIDATION_API OR ZE_RAYTRACING_IMPLICIT_DISPATCH_GLOBALS)) MESSAGE(FATAL_ERROR "Using ZE_RAYTRACING_RT_SIMULATION requires ZE_RAYTRACING_RT_VALIDATION_API=ON and ZE_RAYTRACING_IMPLICIT_DISPATCH_GLOBALS=OFF") ENDIF() IF (ZE_RAYTRACING_RT_SIMULATION) FIND_PACKAGE(rtcore) ADD_DEFINITIONS("-DEMBREE_SYCL_RT_SIMULATION") ENDIF() OPTION(ZE_RAYTRACING_RT_VALIDATION_API "Use rt_validation API instead of IGC provided rt_production API" OFF) IF (ZE_RAYTRACING_RT_VALIDATION_API) ADD_DEFINITIONS("-DEMBREE_SYCL_RT_VALIDATION_API") ENDIF() SET(ZE_RAYTRACING_DEVICE -1 CACHE STRING "Forces Xe device to use.") ADD_DEFINITIONS("-DZE_RAYTRACING_DEVICE=${ZE_RAYTRACING_DEVICE}") OPTION(ZE_RAYTRACING_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON) IF (NOT ZE_RAYTRACING_IMPLICIT_DISPATCH_GLOBALS) ADD_DEFINITIONS("-DEMBREE_SYCL_ALLOC_DISPATCH_GLOBALS") ENDIF() SET(ZE_RAYTRACING_SYCL_TESTS "OFF" CACHE STRING "Enable SYCL tests.") SET_PROPERTY(CACHE ZE_RAYTRACING_SYCL_TESTS PROPERTY STRINGS OFF INTERNAL_RTAS_BUILDER LEVEL_ZERO_RTAS_BUILDER) STRING(TOLOWER "${CMAKE_CXX_COMPILER_ID}" LOWER_CXX_COMPILER_ID) include(${LOWER_CXX_COMPILER_ID}) INCLUDE(CTest) include(package) INCLUDE(CPack) IF (ZE_RAYTRACING_RT_VALIDATION_API) ADD_SUBDIRECTORY(rttrace) SET(EMBREE_RTHWIF_SYCL embree_rthwif_sycl) ENDIF() ADD_SUBDIRECTORY(rtbuild/sys) ADD_SUBDIRECTORY(rtbuild/simd) ADD_SUBDIRECTORY(level_zero) IF (NOT ZE_RAYTRACING_SYCL_TESTS STREQUAL "LEVEL_ZERO_RTAS_BUILDER") # do not build RTAS builder when building LEVEL_ZERO test ADD_SUBDIRECTORY(rtbuild) ENDIF() IF (NOT ZE_RAYTRACING_SYCL_TESTS STREQUAL "OFF") ADD_SUBDIRECTORY(testing) ENDIF() level-zero-raytracing-support-1.2.3/LICENSE.txt000066400000000000000000000261361514453371700213210ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. level-zero-raytracing-support-1.2.3/README.md000066400000000000000000000126711514453371700207540ustar00rootroot00000000000000 # oneAPI Level Zero Ray Tracing Support ## Introduction The oneAPI Level Zero Ray Tracing Support library is an open source project used by Intel(R) oneAPI Level Zero as the implementation of the RTAS extension [ZE_extension_rtas](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/EXT_RTAS.html#ze-extension-rtas) and deprecated experimental RTAS builder extension [ZE_experimental_rtas_builder](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/EXT_Exp_RTASBuilder.html#ze-experimental-rtas-builder). The library implements high performance CPU based ray tracing acceleration structure (RTAS) construction algorithms that take input from the above mentioned APIs and produce an RTAS that is compatible with the ray tracing hardware of Intel GPUs. ## License The oneAPI Level Zero Ray Tracing Support library is distributed under the [Apache 2.0 license](https://opensource.org/license/apache-2-0). ## Supported Platforms |Platform|Supported |--------|:----:| |Alchemist| Y | |Meteor Lake| Y | |Arrow Lake| Y | |Battlemage| Y | |Lunar Lake| Y | |Panther Lake| Y | _No code changes may be introduced that would regress support for any currently supported hardware. All contributions must ensure continued compatibility and functionality across all supported hardware platforms. Failure to maintain hardware compatibility may result in the rejection or reversion of the contribution. Any deliberate modifications or removal of hardware support will be transparently communicated in the release notes._ _Debug parameters, environmental variables, and internal data structures are considered as internal implementation detail and may be changed or removed at any time._ ## Installation The oneAPI Level Zero Ray Tracing Support library is available for installation on a variety of Linux distributions and can be installed via the distro's package manager. For example on Ubuntu* 22.04: ``` apt-get install libze-intel-gpu-raytracing ``` ## Compilation To compile the library under Linux execute: ``` cmake -B build -G Ninja -D CMAKE_BUILD_TYPE=Release . cmake --build build --target package ``` To compile the library under Windows execute: ``` cmake -B build -G "Visual Studio 17 2022" -A "x64" -D CMAKE_BUILD_TYPE=Release . cmake --build build --target package ``` To compile the library under Linux including SYCL tests: ``` wget https://github.com/intel/llvm/releases/download/sycl-nightly%2F20230304/dpcpp-compiler.tar.gz tar xzf dpcpp-compiler source dpcpp_compiler/startup.sh cmake -B build -G Ninja -D CMAKE_CXX_COMPILER=clang++ -D CMAKE_C_COMPILER=clang -D CMAKE_BUILD_TYPE=Release -D ZE_RAYTRACING_SYCL_TESTS=INTERNAL_RTAS_BUILDER . cmake --build build --target package cd build ctest ``` You can configure how TBB is used by setting the ZE_RAYTRACING_TBB cmake variable and ZE_RAYTRACING_TBB_VERSION: * build_static: build a static library of provided TBB version and link that static library (default) * inject_headers: link against system provided TBB version, but use headers of specified TBB version * normal: link against system provided TBB version as normal, using system provided TBB headers Dynamic linking against new TBB versions may cause the library to use new TBB symbols, making it incompatible with older TBB versions. This has been an issue with applications that ship with an older TBB version. To have the library be compatible with older TBB versions you can use the inject_headers mode to force an older TBB interface version to be used: ``` cmake -B build -G Ninja -D CMAKE_BUILD_TYPE=Release -D ZE_RAYTRACING_TBB=inject_headers -D ZE_RAYTRACING_TBB_VERSION=v2021.6.0 . cmake --build build --target package ``` Some TBB headers are included with the project and do not trigger a download, other TBB headers and sources will get downloaded automatically. You can avoid downloading TBB by putting your own TBB sources (or headers) into a folder called tbb in the main repository folder. ``` cp -r tbb-src tbb cmake -B build -G Ninja -D CMAKE_BUILD_TYPE=Release -D ZE_RAYTRACING_TBB=build_static . cmake --build build --target package ``` ## Linking applications Directly linking to the oneAPI Level Zero Ray Tracing Support library is not supported. Level Zero applications should link with [Level Zero Loader](https://github.com/oneapi-src/level-zero) and use the [ZE_extension_rtas](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/EXT_RTAS.html#ze-extension-rtas) API. ## Dependencies * Intel(R) oneAPI Threading Building Blocks [TBB](https://github.com/uxlfoundation/oneTBB). ## How to provide feedback Please submit an issue using github.com [interface](https://github.com/intel/level-zero-raytracing-support/issues). ## How to contribute Create a [pull request](https://github.com/intel/level-zero-raytracing-support/pulls) on github.com with your patch. A maintainer will contact you if there are questions or concerns. ## See also * [oneAPI Level Zero RTAS extension](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/EXT_RTAS.html#ze-extension-rtas) * [oneAPI Level Zero experimental RTAS builder extension](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/EXT_Exp_RTASBuilder.html#ze-experimental-rtas-builder) * [Intel(R) OneApi Level Zero Specification API C/C++ header files](https://github.com/oneapi-src/level-zero/) ___(*) Other names and brands may be claimed as property of others.___ level-zero-raytracing-support-1.2.3/SECURITY.md000066400000000000000000000010101514453371700212470ustar00rootroot00000000000000Security Policy =============== Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. Reporting a Vulnerability ------------------------- Please [report any security vulnerabilities][guidelines] in this project utilizing the [guidelines here][guidelines]. [guidelines]: https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html "Vulnerability Handling Guidelines" level-zero-raytracing-support-1.2.3/cmake/000077500000000000000000000000001514453371700205465ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/cmake/clang.cmake000066400000000000000000000046471514453371700226470ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") # disables clearing an object of type ‘XXX’ with no trivial copy-assignment; use assignment or value-initialization instead #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around. SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM IF (NOT WIN32) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries ENDIF() SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides IF (NOT WIN32) SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable ENDIF() level-zero-raytracing-support-1.2.3/cmake/fetchtbb.cmake000066400000000000000000000066461514453371700233450ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 #################################################################### # fetch TBB and build static version of it IF (NOT ZE_RAYTRACING_TBB_GIT_REPOSITORY) # allow setting this externally SET(ZE_RAYTRACING_TBB_GIT_REPOSITORY "https://github.com/oneapi-src/oneTBB.git") ENDIF() IF (WIN32) option(TBB_STRICT "Treat compiler warnings as errors" OFF) ENDIF() option(TBB_TEST "Enable testing" OFF) option(TBBMALLOC_BUILD "Enable tbbmalloc build" OFF) SET(TBB_DIR OFF) SET(BUILD_SHARED_LIBS OFF) # handle tbb directory with highest priority IF (EXISTS ${PROJECT_SOURCE_DIR}/tbb) IF (NOT TBB_HEADER) add_subdirectory(${PROJECT_SOURCE_DIR}/tbb ${CMAKE_BINARY_DIR}/tbb EXCLUDE_FROM_ALL) ELSE() SET(ZE_RAYTRACING_TBB_HEADER_DIR "${PROJECT_SOURCE_DIR}/tbb/include") ENDIF() # next check if TBB headers are to be used and are present ELSEIF (TBB_HEADER AND EXISTS ${PROJECT_SOURCE_DIR}/external/tbb/${ZE_RAYTRACING_TBB_VERSION}) MESSAGE(STATUS "Headers for TBB ${ZE_RAYTRACING_TBB_VERSION} are available.") SET(ZE_RAYTRACING_TBB_HEADER_DIR "${PROJECT_SOURCE_DIR}/external/tbb/${ZE_RAYTRACING_TBB_VERSION}/include") # otherwise download TBB ELSE() INCLUDE(FetchContent) SET(FETCHCONTENT_QUIET OFF) FetchContent_Declare( tbb_static GIT_REPOSITORY ${ZE_RAYTRACING_TBB_GIT_REPOSITORY} GIT_TAG ${ZE_RAYTRACING_TBB_VERSION} ) FetchContent_GetProperties(tbb_static) if(NOT tbb_static_POPULATED) FetchContent_Populate(tbb_static) # patch clang.cmake after fetch as WA for https://github.com/uxlfoundation/oneTBB/issues/1741 if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND ZE_RAYTRACING_TBB_VERSION VERSION_GREATER_EQUAL "v2021.10.0" AND ZE_RAYTRACING_TBB_VERSION VERSION_LESS_EQUAL "v2022.3.0") file(READ "${tbb_static_SOURCE_DIR}/cmake/compilers/Clang.cmake" TBB_CLANG_CONTENT) string(REPLACE "-fPIC" "" TBB_CLANG_CONTENT "${TBB_CLANG_CONTENT}") file(WRITE "${tbb_static_SOURCE_DIR}/cmake/compilers/Clang.cmake" "${TBB_CLANG_CONTENT}") endif() # We want to build tbb_static to link it into embree_rthwif, but don't want to # install it as part of the Embree install targets. IF (NOT TBB_HEADER) add_subdirectory(${tbb_static_SOURCE_DIR} ${tbb_static_BINARY_DIR} EXCLUDE_FROM_ALL) # Suppress stringop-overflow warnings for TBB build if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(tbb PRIVATE $<$:-Wno-stringop-overflow> ) endif() ELSE() SET(ZE_RAYTRACING_TBB_HEADER_DIR ${tbb_static_SOURCE_DIR}/include) ENDIF() endif() ENDIF() MARK_AS_ADVANCED(FETCHCONTENT_BASE_DIR) MARK_AS_ADVANCED(FETCHCONTENT_FULLY_DISCONNECTED) MARK_AS_ADVANCED(FETCHCONTENT_QUIET) MARK_AS_ADVANCED(FETCHCONTENT_SOURCE_DIR_TBB_STATIC) MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED) MARK_AS_ADVANCED(FETCHCONTENT_UPDATES_DISCONNECTED_TBB_STATIC) MARK_AS_ADVANCED(TBB4PY_BUILD) MARK_AS_ADVANCED(TBBMALLOC_BUILD) MARK_AS_ADVANCED(TBB_BUILD) MARK_AS_ADVANCED(TBB_CPF) MARK_AS_ADVANCED(TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH) MARK_AS_ADVANCED(TBB_ENABLE_IPO) MARK_AS_ADVANCED(TBB_EXAMPLES) MARK_AS_ADVANCED(TBB_FIND_PACKAGE) MARK_AS_ADVANCED(TBB_INSTALL_VARS) MARK_AS_ADVANCED(TBB_NO_APPCONTAINER) MARK_AS_ADVANCED(TBB_SANITIZE) MARK_AS_ADVANCED(TBB_STRICT) MARK_AS_ADVANCED(TBB_TEST) MARK_AS_ADVANCED(TBB_TEST_SPEC) MARK_AS_ADVANCED(TBB_VALGRIND_MEMCHECK) MARK_AS_ADVANCED(TBB_WINDOWS_DRIVER) level-zero-raytracing-support-1.2.3/cmake/gnu.cmake000066400000000000000000000045471514453371700223530ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") # disables clearing an object of type ‘XXX’ with no trivial copy-assignment; use assignment or value-initialization instead SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around. SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable level-zero-raytracing-support-1.2.3/cmake/intelllvm.cmake000066400000000000000000000046471514453371700235710ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # enables most warnings SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") # enables string format vulnerability warnings #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") # disables clearing an object of type ‘XXX’ with no trivial copy-assignment; use assignment or value-initialization instead #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-overflow") # assume that signed overflow occurs SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-delete-null-pointer-checks") # keep all checks for NULL pointers SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv") # this option instructs the compiler to assume that signed arithmetic overflow warps around. SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") # treat char as signed on all processors, including ARM IF (NOT WIN32) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIE") # enables support for more secure position independent execution SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") # generate position independent code suitable for shared libraries ENDIF() SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") # disables strict aliasing rules SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2") # perform extra security checks for some standard library calls SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") # protects against return address overrides IF (NOT WIN32) SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") # issues link error for undefined symbols in shared library SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z relro -z now") # re-arranges data sections to increase security SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") # enables position independent execution for executable ENDIF() level-zero-raytracing-support-1.2.3/cmake/msvc.cmake000066400000000000000000000014371514453371700225250ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS") # protects against return address overrides SET(SECURE_LINKER_FLAGS "") SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /NXCompat") # compatible with data execution prevention (on by default) SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /DynamicBase") # random rebase of executable at load time IF (CMAKE_SIZEOF_VOID_P EQUAL 4) SET(SECURE_LINKER_FLAGS "${SECURE_LINKER_FLAGS} /SafeSEH") # invoke known exception handlers (Win32 only, x64 exception handlers are safe by design) ENDIF() SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}") SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SECURE_LINKER_FLAGS}") level-zero-raytracing-support-1.2.3/cmake/package.cmake000066400000000000000000000050171514453371700231460ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 INCLUDE(GNUInstallDirs) ############################################################## # Install Documentation ############################################################## IF (WIN32) INSTALL(FILES "${PROJECT_SOURCE_DIR}/LICENSE.txt" DESTINATION doc COMPONENT lib) INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs.txt" DESTINATION doc COMPONENT lib) INSTALL(FILES "${PROJECT_SOURCE_DIR}/third-party-programs-TBB.txt" DESTINATION doc COMPONENT lib) # INSTALL(FILES "${PROJECT_SOURCE_DIR}/CHANGELOG.md" DESTINATION doc COMPONENT lib) ELSE() # Linux package builds include these separately ENDIF() ############################################################## # CPack specific stuff ############################################################## SET(CPACK_PACKAGE_NAME "oneAPI Level Zero Ray Tracing Support") IF (NOT DEFINED CPACK_PACKAGE_FILE_NAME) SET(CPACK_PACKAGE_FILE_NAME "intel-level-zero-gpu-raytracing-test") ENDIF() SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}_${ZE_RAYTRACING_VERSION}") SET(CPACK_STRIP_FILES TRUE) SET(CPACK_PACKAGE_VERSION_MAJOR ${ZE_RAYTRACING_VERSION_MAJOR}) SET(CPACK_PACKAGE_VERSION_MINOR ${ZE_RAYTRACING_VERSION_MINOR}) SET(CPACK_PACKAGE_VERSION_PATCH ${ZE_RAYTRACING_VERSION_PATCH}) SET(CPACK_PACKAGE_VERSION ${ZE_RAYTRACING_VERSION}) SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Support library for Level Zero ray tracing extension.") SET(CPACK_PACKAGE_VENDOR "Intel Corporation") SET(CPACK_PACKAGE_CONTACT embree_support@intel.com) #SET(CPACK_MONOLITHIC_INSTALL 0) #SET(CPACK_COMPONENTS_GROUPING ONE_PER_GROUP) #SET(CPACK_COMPONENTS_GROUPING IGNORE) SET(CPACK_COMPONENT_LIB_DISPLAY_NAME "Library") SET(CPACK_COMPONENT_LIB_DESCRIPTION "Library") SET(CPACK_COMPONENT_LIB_GROUP LIB) SET(CPACK_COMPONENT_DEVEL_DISPLAY_NAME "Development") SET(CPACK_COMPONENT_DEVEL_DESCRIPTION "Development") SET(CPACK_COMPONENT_DEVEL_GROUP DEVEL) SET(CPACK_COMPONENT_TEST_DISPLAY_NAME "Tests") SET(CPACK_COMPONENT_TEST_DESCRIPTION "Tests") SET(CPACK_COMPONENT_TEST_GROUP TEST) SET(CPACK_COMPONENTS_ALL LIB DEVEL TEST) # Windows specific settings IF(WIN32) SET(CPACK_GENERATOR ZIP) SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x64.windows") # MacOSX specific settings ELSEIF(APPLE) SET(CPACK_GENERATOR ZIP) SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.macosx") # Linux specific settings ELSE() SET(CPACK_GENERATOR TGZ) SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}.x86_64.linux") ENDIF() level-zero-raytracing-support-1.2.3/external/000077500000000000000000000000001514453371700213105ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/000077500000000000000000000000001514453371700220575ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/000077500000000000000000000000001514453371700231335ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/LICENSE.txt000066400000000000000000000261351514453371700247650ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/000077500000000000000000000000001514453371700245565ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/000077500000000000000000000000001514453371700260315ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb.h000066400000000000000000000052161514453371700267550ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_tbb_H #define __TBB_tbb_H /** This header bulk-includes declarations or definitions of all the functionality provided by TBB (save for tbbmalloc and 3rd party dependent headers). If you use only a few TBB constructs, consider including specific headers only. Any header listed below can be included independently of others. **/ #include "oneapi/tbb/blocked_range.h" #include "oneapi/tbb/blocked_range2d.h" #include "oneapi/tbb/blocked_range3d.h" #if TBB_PREVIEW_BLOCKED_RANGE_ND #include "tbb/blocked_rangeNd.h" #endif #include "oneapi/tbb/cache_aligned_allocator.h" #include "oneapi/tbb/combinable.h" #include "oneapi/tbb/concurrent_hash_map.h" #if TBB_PREVIEW_CONCURRENT_LRU_CACHE #include "tbb/concurrent_lru_cache.h" #endif #include "oneapi/tbb/collaborative_call_once.h" #include "oneapi/tbb/concurrent_priority_queue.h" #include "oneapi/tbb/concurrent_queue.h" #include "oneapi/tbb/concurrent_unordered_map.h" #include "oneapi/tbb/concurrent_unordered_set.h" #include "oneapi/tbb/concurrent_map.h" #include "oneapi/tbb/concurrent_set.h" #include "oneapi/tbb/concurrent_vector.h" #include "oneapi/tbb/enumerable_thread_specific.h" #include "oneapi/tbb/flow_graph.h" #include "oneapi/tbb/global_control.h" #include "oneapi/tbb/info.h" #include "oneapi/tbb/null_mutex.h" #include "oneapi/tbb/null_rw_mutex.h" #include "oneapi/tbb/parallel_for.h" #include "oneapi/tbb/parallel_for_each.h" #include "oneapi/tbb/parallel_invoke.h" #include "oneapi/tbb/parallel_pipeline.h" #include "oneapi/tbb/parallel_reduce.h" #include "oneapi/tbb/parallel_scan.h" #include "oneapi/tbb/parallel_sort.h" #include "oneapi/tbb/partitioner.h" #include "oneapi/tbb/queuing_mutex.h" #include "oneapi/tbb/queuing_rw_mutex.h" #include "oneapi/tbb/spin_mutex.h" #include "oneapi/tbb/spin_rw_mutex.h" #include "oneapi/tbb/task.h" #include "oneapi/tbb/task_arena.h" #include "oneapi/tbb/task_group.h" #include "oneapi/tbb/task_scheduler_observer.h" #include "oneapi/tbb/tbb_allocator.h" #include "oneapi/tbb/tick_count.h" #include "oneapi/tbb/version.h" #endif /* __TBB_tbb_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/000077500000000000000000000000001514453371700266005ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/blocked_range.h000066400000000000000000000144651514453371700315420ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_blocked_range_H #define __TBB_blocked_range_H #include #include "detail/_range_common.h" #include "detail/_namespace_injection.h" #include "version.h" namespace tbb { namespace detail { namespace d1 { /** \page range_req Requirements on range concept Class \c R implementing the concept of range must define: - \code R::R( const R& ); \endcode Copy constructor - \code R::~R(); \endcode Destructor - \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges - \code bool R::empty() const; \endcode True if range is empty - \code R::R( R& r, split ); \endcode Split range \c r into two subranges. **/ //! A range over which to iterate. /** @ingroup algorithms */ template __TBB_requires(blocked_range_value) class blocked_range { public: //! Type of a value /** Called a const_iterator for sake of algorithms that need to treat a blocked_range as an STL container. */ using const_iterator = Value; //! Type for size of a range using size_type = std::size_t; //! Construct range over half-open interval [begin,end), with the given grainsize. blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : my_end(end_), my_begin(begin_), my_grainsize(grainsize_) { __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" ); } //! Beginning of range. const_iterator begin() const { return my_begin; } //! One past last value in range. const_iterator end() const { return my_end; } //! Size of the range /** Unspecified if end() __TBB_requires(blocked_range_value && blocked_range_value) friend class blocked_range2d; template __TBB_requires(blocked_range_value && blocked_range_value && blocked_range_value) friend class blocked_range3d; template __TBB_requires(blocked_range_value) friend class blocked_rangeNd_impl; }; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::blocked_range; // Split types using detail::split; using detail::proportional_split; } // namespace v1 } // namespace tbb #endif /* __TBB_blocked_range_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/blocked_range2d.h000066400000000000000000000064511514453371700317640ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_blocked_range2d_H #define __TBB_blocked_range2d_H #include #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_range_common.h" #include "blocked_range.h" namespace tbb { namespace detail { namespace d1 { //! A 2-dimensional range that models the Range concept. /** @ingroup algorithms */ template __TBB_requires(blocked_range_value && blocked_range_value) class blocked_range2d { public: //! Type for size of an iteration range using row_range_type = blocked_range; using col_range_type = blocked_range; private: row_range_type my_rows; col_range_type my_cols; public: blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : my_rows(row_begin,row_end,row_grainsize), my_cols(col_begin,col_end,col_grainsize) {} blocked_range2d( RowValue row_begin, RowValue row_end, ColValue col_begin, ColValue col_end ) : my_rows(row_begin,row_end), my_cols(col_begin,col_end) {} //! True if range is empty bool empty() const { // Range is empty if at least one dimension is empty. return my_rows.empty() || my_cols.empty(); } //! True if range is divisible into two pieces. bool is_divisible() const { return my_rows.is_divisible() || my_cols.is_divisible(); } blocked_range2d( blocked_range2d& r, split ) : my_rows(r.my_rows), my_cols(r.my_cols) { split split_obj; do_split(r, split_obj); } blocked_range2d( blocked_range2d& r, proportional_split& proportion ) : my_rows(r.my_rows), my_cols(r.my_cols) { do_split(r, proportion); } //! The rows of the iteration space const row_range_type& rows() const { return my_rows; } //! The columns of the iteration space const col_range_type& cols() const { return my_cols; } private: template void do_split( blocked_range2d& r, Split& split_obj ) { if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); } else { my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); } } }; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::blocked_range2d; } // namespace v1 } // namespace tbb #endif /* __TBB_blocked_range2d_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/blocked_range3d.h000066400000000000000000000105471514453371700317660ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_blocked_range3d_H #define __TBB_blocked_range3d_H #include #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "blocked_range.h" namespace tbb { namespace detail { namespace d1 { //! A 3-dimensional range that models the Range concept. /** @ingroup algorithms */ template __TBB_requires(blocked_range_value && blocked_range_value && blocked_range_value) class blocked_range3d { public: //! Type for size of an iteration range using page_range_type = blocked_range; using row_range_type = blocked_range; using col_range_type = blocked_range; private: page_range_type my_pages; row_range_type my_rows; col_range_type my_cols; public: blocked_range3d( PageValue page_begin, PageValue page_end, RowValue row_begin, RowValue row_end, ColValue col_begin, ColValue col_end ) : my_pages(page_begin,page_end), my_rows(row_begin,row_end), my_cols(col_begin,col_end) {} blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : my_pages(page_begin,page_end,page_grainsize), my_rows(row_begin,row_end,row_grainsize), my_cols(col_begin,col_end,col_grainsize) {} //! True if range is empty bool empty() const { // Range is empty if at least one dimension is empty. return my_pages.empty() || my_rows.empty() || my_cols.empty(); } //! True if range is divisible into two pieces. bool is_divisible() const { return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible(); } blocked_range3d( blocked_range3d& r, split split_obj ) : my_pages(r.my_pages), my_rows(r.my_rows), my_cols(r.my_cols) { do_split(r, split_obj); } blocked_range3d( blocked_range3d& r, proportional_split& proportion ) : my_pages(r.my_pages), my_rows(r.my_rows), my_cols(r.my_cols) { do_split(r, proportion); } //! The pages of the iteration space const page_range_type& pages() const { return my_pages; } //! The rows of the iteration space const row_range_type& rows() const { return my_rows; } //! The columns of the iteration space const col_range_type& cols() const { return my_cols; } private: template void do_split( blocked_range3d& r, Split& split_obj) { if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) { if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); } else { my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); } } else { if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) { my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); } else { my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj); } } } }; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::blocked_range3d; } // namespace v1 } // namespace tbb #endif /* __TBB_blocked_range3d_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/blocked_rangeNd.h000066400000000000000000000126041514453371700320150ustar00rootroot00000000000000/* Copyright (c) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_blocked_rangeNd_H #define __TBB_blocked_rangeNd_H #if !TBB_PREVIEW_BLOCKED_RANGE_ND #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h #endif #include // std::any_of #include #include #include // std::is_same, std::enable_if #include "detail/_config.h" #include "detail/_template_helpers.h" // index_sequence, make_index_sequence #include "detail/_range_common.h" #include "blocked_range.h" namespace tbb { namespace detail { namespace d1 { /* The blocked_rangeNd_impl uses make_index_sequence to automatically generate a ctor with exactly N arguments of the type tbb::blocked_range. Such ctor provides an opportunity to use braced-init-list parameters to initialize each dimension. Use of parameters, whose representation is a braced-init-list, but they're not std::initializer_list or a reference to one, produces a non-deduced context within template argument deduction. NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl (and not e.g. a derived class), otherwise it would need to declare its own ctor facing the same problem that the impl class solves. */ template> __TBB_requires(blocked_range_value) class blocked_rangeNd_impl; template __TBB_requires(blocked_range_value) class blocked_rangeNd_impl> { public: //! Type of a value. using value_type = Value; private: //! Helper type to construct range with N tbb::blocked_range objects. template using dim_type_helper = tbb::blocked_range; public: blocked_rangeNd_impl() = delete; //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range. blocked_rangeNd_impl(const dim_type_helper&... args) : my_dims{ {args...} } {} //! Dimensionality of a range. static constexpr unsigned int ndims() { return N; } //! Range in certain dimension. const tbb::blocked_range& dim(unsigned int dimension) const { __TBB_ASSERT(dimension < N, "out of bound"); return my_dims[dimension]; } //------------------------------------------------------------------------ // Methods that implement Range concept //------------------------------------------------------------------------ //! True if at least one dimension is empty. bool empty() const { return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range& d) { return d.empty(); }); } //! True if at least one dimension is divisible. bool is_divisible() const { return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range& d) { return d.is_divisible(); }); } blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) { do_split(r, proportion); } blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) { do_split(r, proportion); } private: static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed"); //! Ranges in each dimension. std::array, N> my_dims; template void do_split(blocked_rangeNd_impl& r, split_type proportion) { static_assert((std::is_same::value || std::is_same::value), "type of split object is incorrect"); __TBB_ASSERT(r.is_divisible(), "can't split not divisible range"); auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range& first, const tbb::blocked_range& second) { return (first.size() * second.grainsize() < second.size() * first.grainsize()); }); auto r_it = r.my_dims.begin() + (my_it - my_dims.begin()); my_it->my_begin = tbb::blocked_range::do_split(*r_it, proportion); // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin), "blocked_range has been split incorrectly"); } }; template using blocked_rangeNd = blocked_rangeNd_impl; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::blocked_rangeNd; } // namespace v1 } // namespace tbb #endif /* __TBB_blocked_rangeNd_H */ cache_aligned_allocator.h000066400000000000000000000153711514453371700334670ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_cache_aligned_allocator_H #define __TBB_cache_aligned_allocator_H #include "detail/_utils.h" #include "detail/_namespace_injection.h" #include #include #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT #include #endif namespace tbb { namespace detail { namespace r1 { TBB_EXPORT void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); TBB_EXPORT void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); TBB_EXPORT std::size_t __TBB_EXPORTED_FUNC cache_line_size(); } namespace d1 { template class cache_aligned_allocator { public: using value_type = T; using propagate_on_container_move_assignment = std::true_type; //! Always defined for TBB containers (supported since C++17 for std containers) using is_always_equal = std::true_type; cache_aligned_allocator() = default; template cache_aligned_allocator(const cache_aligned_allocator&) noexcept {} //! Allocate space for n objects, starting on a cache/sector line. __TBB_nodiscard T* allocate(std::size_t n) { return static_cast(r1::cache_aligned_allocate(n * sizeof(value_type))); } //! Free block of memory that starts on a cache line void deallocate(T* p, std::size_t) { r1::cache_aligned_deallocate(p); } //! Largest value for which method allocate might succeed. std::size_t max_size() const noexcept { return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type); } #if TBB_ALLOCATOR_TRAITS_BROKEN using pointer = value_type*; using const_pointer = const value_type*; using reference = value_type&; using const_reference = const value_type&; using difference_type = std::ptrdiff_t; using size_type = std::size_t; template struct rebind { using other = cache_aligned_allocator; }; template void construct(U *p, Args&&... args) { ::new (p) U(std::forward(args)...); } void destroy(pointer p) { p->~value_type(); } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } #endif // TBB_ALLOCATOR_TRAITS_BROKEN }; #if TBB_ALLOCATOR_TRAITS_BROKEN template<> class cache_aligned_allocator { public: using pointer = void*; using const_pointer = const void*; using value_type = void; template struct rebind { using other = cache_aligned_allocator; }; }; #endif template bool operator==(const cache_aligned_allocator&, const cache_aligned_allocator&) noexcept { return true; } #if !__TBB_CPP20_COMPARISONS_PRESENT template bool operator!=(const cache_aligned_allocator&, const cache_aligned_allocator&) noexcept { return false; } #endif #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT //! C++17 memory resource wrapper to ensure cache line size alignment class cache_aligned_resource : public std::pmr::memory_resource { public: cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {} explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {} std::pmr::memory_resource* upstream_resource() const { return m_upstream; } private: //! We don't know what memory resource set. Use padding to guarantee alignment void* do_allocate(std::size_t bytes, std::size_t alignment) override { // TODO: make it common with tbb_allocator.cpp std::size_t cache_line_alignment = correct_alignment(alignment); std::size_t space = correct_size(bytes) + cache_line_alignment; std::uintptr_t base = reinterpret_cast(m_upstream->allocate(space)); __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); // Round up to the next cache line (align the base address) std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header"); __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); // Record where block actually starts. (reinterpret_cast(result))[-1] = base; return reinterpret_cast(result); } void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override { if (ptr) { // Recover where block actually starts std::uintptr_t base = (reinterpret_cast(ptr))[-1]; m_upstream->deallocate(reinterpret_cast(base), correct_size(bytes) + correct_alignment(alignment)); } } bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { if (this == &other) { return true; } #if __TBB_USE_OPTIONAL_RTTI const cache_aligned_resource* other_res = dynamic_cast(&other); return other_res && (upstream_resource() == other_res->upstream_resource()); #else return false; #endif } std::size_t correct_alignment(std::size_t alignment) { __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2"); #if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT std::size_t cache_line_size = std::hardware_destructive_interference_size; #else std::size_t cache_line_size = r1::cache_line_size(); #endif return alignment < cache_line_size ? cache_line_size : alignment; } std::size_t correct_size(std::size_t bytes) { // To handle the case, when small size requested. There could be not // enough space to store the original pointer. return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes; } std::pmr::memory_resource* m_upstream; }; #endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::cache_aligned_allocator; #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT using detail::d1::cache_aligned_resource; #endif } // namespace v1 } // namespace tbb #endif /* __TBB_cache_aligned_allocator_H */ collaborative_call_once.h000066400000000000000000000213601514453371700335210ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_collaborative_call_once_H #define __TBB_collaborative_call_once_H #include "task_arena.h" #include "task_group.h" #include namespace tbb { namespace detail { namespace d1 { #if _MSC_VER && !defined(__INTEL_COMPILER) // Suppress warning: structure was padded due to alignment specifier #pragma warning (push) #pragma warning (disable: 4324) #endif constexpr std::uintptr_t collaborative_once_max_references = max_nfs_size; constexpr std::uintptr_t collaborative_once_references_mask = collaborative_once_max_references-1; class alignas(max_nfs_size) collaborative_once_runner : no_copy { struct storage_t { task_arena m_arena{ task_arena::attach{} }; wait_context m_wait_context{1}; }; std::atomic m_ref_count{0}; std::atomic m_is_ready{false}; // Storage with task_arena and wait_context must be initialized only by winner thread union { storage_t m_storage; }; template void isolated_execute(Fn f) { auto func = [f] { f(); // delegate_base requires bool returning functor while isolate_within_arena ignores the result return true; }; delegated_function delegate(func); r1::isolate_within_arena(delegate, reinterpret_cast(this)); } public: class lifetime_guard : no_copy { collaborative_once_runner& m_runner; public: lifetime_guard(collaborative_once_runner& r) : m_runner(r) { m_runner.m_ref_count++; } ~lifetime_guard() { m_runner.m_ref_count--; } }; collaborative_once_runner() {} ~collaborative_once_runner() { spin_wait_until_eq(m_ref_count, 0, std::memory_order_acquire); if (m_is_ready.load(std::memory_order_relaxed)) { m_storage.~storage_t(); } } std::uintptr_t to_bits() { return reinterpret_cast(this); } static collaborative_once_runner* from_bits(std::uintptr_t bits) { __TBB_ASSERT( (bits & collaborative_once_references_mask) == 0, "invalid pointer, last log2(max_nfs_size) bits must be zero" ); return reinterpret_cast(bits); } template void run_once(F&& f) { __TBB_ASSERT(!m_is_ready.load(std::memory_order_relaxed), "storage with task_arena and wait_context is already initialized"); // Initialize internal state new(&m_storage) storage_t(); m_storage.m_arena.execute([&] { isolated_execute([&] { task_group_context context{ task_group_context::bound, task_group_context::default_traits | task_group_context::concurrent_wait }; function_stack_task t{ std::forward(f), m_storage.m_wait_context }; // Set the ready flag after entering the execute body to prevent // moonlighting threads from occupying all slots inside the arena. m_is_ready.store(true, std::memory_order_release); execute_and_wait(t, context, m_storage.m_wait_context, context); }); }); } void assist() noexcept { // Do not join the arena until the winner thread takes the slot spin_wait_while_eq(m_is_ready, false); m_storage.m_arena.execute([&] { isolated_execute([&] { // We do not want to get an exception from user functor on moonlighting threads. // The exception is handled with the winner thread task_group_context stub_context; wait(m_storage.m_wait_context, stub_context); }); }); } }; class collaborative_once_flag : no_copy { enum state : std::uintptr_t { uninitialized, done, #if TBB_USE_ASSERT dead #endif }; std::atomic m_state{ state::uninitialized }; template friend void collaborative_call_once(collaborative_once_flag& flag, Fn&& f, Args&&... args); void set_completion_state(std::uintptr_t runner_bits, std::uintptr_t desired) { std::uintptr_t expected = runner_bits; do { expected = runner_bits; // Possible inefficiency: when we start waiting, // some moonlighting threads might continue coming that will prolong our waiting. // Fortunately, there are limited number of threads on the system so wait time is limited. spin_wait_until_eq(m_state, expected); } while (!m_state.compare_exchange_strong(expected, desired)); } template void do_collaborative_call_once(Fn&& f) { std::uintptr_t expected = m_state.load(std::memory_order_acquire); collaborative_once_runner runner; do { if (expected == state::uninitialized && m_state.compare_exchange_strong(expected, runner.to_bits())) { // Winner thread runner.run_once([&] { try_call([&] { std::forward(f)(); }).on_exception([&] { // Reset the state to uninitialized to allow other threads to try initialization again set_completion_state(runner.to_bits(), state::uninitialized); }); // We successfully executed functor set_completion_state(runner.to_bits(), state::done); }); break; } else { // Moonlighting thread: we need to add a reference to the state to prolong runner lifetime. // However, the maximum number of references are limited with runner alignment. // So, we use CAS loop and spin_wait to guarantee that references never exceed "max_value". do { auto max_value = expected | collaborative_once_references_mask; expected = spin_wait_while_eq(m_state, max_value); // "expected > state::done" prevents storing values, when state is uninitialized or done } while (expected > state::done && !m_state.compare_exchange_strong(expected, expected + 1)); if (auto shared_runner = collaborative_once_runner::from_bits(expected & ~collaborative_once_references_mask)) { collaborative_once_runner::lifetime_guard guard{*shared_runner}; m_state.fetch_sub(1); // The moonlighting threads are not expected to handle exceptions from user functor. // Therefore, no exception is expected from assist(). shared_runner->assist(); } } __TBB_ASSERT(m_state.load(std::memory_order_relaxed) != state::dead, "collaborative_once_flag has been prematurely destroyed"); } while (expected != state::done); } #if TBB_USE_ASSERT public: ~collaborative_once_flag() { m_state.store(state::dead, std::memory_order_relaxed); } #endif }; template void collaborative_call_once(collaborative_once_flag& flag, Fn&& fn, Args&&... args) { __TBB_ASSERT(flag.m_state.load(std::memory_order_relaxed) != collaborative_once_flag::dead, "collaborative_once_flag has been prematurely destroyed"); if (flag.m_state.load(std::memory_order_acquire) != collaborative_once_flag::done) { #if __TBB_GCC_PARAMETER_PACK_IN_LAMBDAS_BROKEN // Using stored_pack to suppress bug in GCC 4.8 // with parameter pack expansion in lambda auto stored_pack = save_pack(std::forward(args)...); auto func = [&] { call(std::forward(fn), std::move(stored_pack)); }; #else auto func = [&] { fn(std::forward(args)...); }; #endif flag.do_collaborative_call_once(func); } } #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning (pop) // 4324 warning #endif } // namespace d1 } // namespace detail using detail::d1::collaborative_call_once; using detail::d1::collaborative_once_flag; } // namespace tbb #endif // __TBB_collaborative_call_once_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/combinable.h000066400000000000000000000036611514453371700310520ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_combinable_H #define __TBB_combinable_H #include "detail/_namespace_injection.h" #include "enumerable_thread_specific.h" #include "cache_aligned_allocator.h" namespace tbb { namespace detail { namespace d1 { /** \name combinable **/ //@{ //! Thread-local storage with optional reduction /** @ingroup containers */ template class combinable { using my_alloc = typename tbb::cache_aligned_allocator; using my_ets_type = typename tbb::enumerable_thread_specific; my_ets_type my_ets; public: combinable() = default; template explicit combinable(Finit _finit) : my_ets(_finit) { } void clear() { my_ets.clear(); } T& local() { return my_ets.local(); } T& local(bool& exists) { return my_ets.local(exists); } // combine_func_t has signature T(T,T) or T(const T&, const T&) template T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); } // combine_func_t has signature void(T) or void(const T&) template void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); } }; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::combinable; } // inline namespace v1 } // namespace tbb #endif /* __TBB_combinable_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/concurrent_hash_map.h000066400000000000000000002134711514453371700330030ustar00rootroot00000000000000/* Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_hash_map_H #define __TBB_concurrent_hash_map_H #include "detail/_namespace_injection.h" #include "detail/_utils.h" #include "detail/_assert.h" #include "detail/_allocator_traits.h" #include "detail/_containers_helpers.h" #include "detail/_template_helpers.h" #include "detail/_hash_compare.h" #include "detail/_range_common.h" #include "tbb_allocator.h" #include "spin_rw_mutex.h" #include #include #include #include #include // Need std::pair #include // Need std::memset namespace tbb { namespace detail { namespace d2 { #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS && __TBB_CPP20_CONCEPTS_PRESENT template concept ch_map_rw_scoped_lockable = rw_scoped_lockable && requires(const typename Mutex::scoped_lock& sl) { { sl.is_writer() } -> std::convertible_to; }; #endif template struct hash_map_node_base : no_copy { using mutex_type = MutexType; // Scoped lock type for mutex using scoped_type = typename MutexType::scoped_lock; // Next node in chain hash_map_node_base* next; mutex_type mutex; }; // Incompleteness flag value static void* const rehash_req_flag = reinterpret_cast(std::size_t(3)); // Rehashed empty bucket flag static void* const empty_rehashed_flag = reinterpret_cast(std::size_t(0)); template bool rehash_required( hash_map_node_base* node_ptr ) { return reinterpret_cast(node_ptr) == rehash_req_flag; } #if TBB_USE_ASSERT template bool empty_rehashed( hash_map_node_base* node_ptr ) { return reinterpret_cast(node_ptr) == empty_rehashed_flag; } #endif // base class of concurrent_hash_map template class hash_map_base { public: using size_type = std::size_t; using hashcode_type = std::size_t; using segment_index_type = std::size_t; using node_base = hash_map_node_base; struct bucket : no_copy { using mutex_type = MutexType; using scoped_type = typename mutex_type::scoped_lock; bucket() : node_list(nullptr) {} bucket( node_base* ptr ) : node_list(ptr) {} mutex_type mutex; std::atomic node_list; }; using allocator_type = Allocator; using allocator_traits_type = tbb::detail::allocator_traits; using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc; using bucket_allocator_traits = tbb::detail::allocator_traits; // Count of segments in the first block static constexpr size_type embedded_block = 1; // Count of segments in the first block static constexpr size_type embedded_buckets = 1 << embedded_block; // Count of segments in the first block static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 // Size of a pointer / table size static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit using segment_ptr_type = bucket*; using atomic_segment_type = std::atomic; using segments_table_type = atomic_segment_type[pointers_per_table]; hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) { for (size_type i = 0; i != embedded_buckets; ++i) { my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); } for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr; my_table[segment_index].store(argument, std::memory_order_relaxed); } __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks"); } // segment index of given index in the array static segment_index_type segment_index_of( size_type index ) { return segment_index_type(tbb::detail::log2( index|1 )); } // the first array index of given segment static segment_index_type segment_base( segment_index_type k ) { return (segment_index_type(1) << k & ~segment_index_type(1)); } // segment size except for k == 0 static size_type segment_size( segment_index_type k ) { return size_type(1) << k; // fake value for k==0 } // true if ptr is valid pointer static bool is_valid( void* ptr ) { return reinterpret_cast(ptr) > uintptr_t(63); } template void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) { for (size_type i = 0; i < sz; ++i) { bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward(args)...); } } // Initialize buckets void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) { if (is_initial) { init_buckets_impl(ptr, sz); } else { init_buckets_impl(ptr, sz, reinterpret_cast(rehash_req_flag)); } } // Add node n to bucket b static void add_to_bucket( bucket* b, node_base* n ) { __TBB_ASSERT(!rehash_required(b->node_list.load(std::memory_order_relaxed)), nullptr); n->next = b->node_list.load(std::memory_order_relaxed); b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set } const bucket_allocator_type& get_allocator() const { return my_allocator; } bucket_allocator_type& get_allocator() { return my_allocator; } // Enable segment void enable_segment( segment_index_type k, bool is_initial = false ) { __TBB_ASSERT( k, "Zero segment must be embedded" ); size_type sz; __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment"); if (k >= first_block) { sz = segment_size(k); segment_ptr_type ptr = nullptr; try_call( [&] { ptr = bucket_allocator_traits::allocate(my_allocator, sz); } ).on_exception( [&] { my_table[k].store(nullptr, std::memory_order_relaxed); }); __TBB_ASSERT(ptr, nullptr); init_buckets(ptr, sz, is_initial); my_table[k].store(ptr, std::memory_order_release); sz <<= 1;// double it to get entire capacity of the container } else { // the first block __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); sz = segment_size(first_block); segment_ptr_type ptr = nullptr; try_call( [&] { ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets); } ).on_exception( [&] { my_table[k].store(nullptr, std::memory_order_relaxed); }); __TBB_ASSERT(ptr, nullptr); init_buckets(ptr, sz - embedded_buckets, is_initial); ptr -= segment_base(embedded_block); for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets my_table[i].store(ptr + segment_base(i), std::memory_order_release); } my_mask.store(sz-1, std::memory_order_release); } void delete_segment( segment_index_type s ) { segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed); size_type sz = segment_size( s ? s : 1 ); size_type deallocate_size = 0; if (s >= first_block) { // the first segment or the next deallocate_size = sz; } else if (s == embedded_block && embedded_block != first_block) { deallocate_size = segment_size(first_block) - embedded_buckets; } for (size_type i = 0; i < deallocate_size; ++i) { bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i); } if (deallocate_size != 0) { bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size); } if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed); } // Get bucket by (masked) hashcode bucket *get_bucket( hashcode_type h ) const noexcept { segment_index_type s = segment_index_of( h ); h -= segment_base(s); segment_ptr_type seg = my_table[s].load(std::memory_order_acquire); __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" ); return &seg[h]; } // detail serial rehashing helper void mark_rehashed_levels( hashcode_type h ) noexcept { segment_index_type s = segment_index_of( h ); while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed)) if (rehash_required(seg[h].node_list.load(std::memory_order_relaxed))) { seg[h].node_list.store(reinterpret_cast(empty_rehashed_flag), std::memory_order_relaxed); mark_rehashed_levels( h + ((hashcode_type)1<node_list.load(std::memory_order_acquire))) { return true; } } return false; } // Insert a node and check for load factor. @return segment index to enable. segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) { size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted add_to_bucket( b, n ); // check load factor if( sz >= mask ) { // TODO: add custom load_factor segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); static const segment_ptr_type is_allocating = segment_ptr_type(2);; segment_ptr_type disabled = nullptr; if (!(my_table[new_seg].load(std::memory_order_acquire)) && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) return new_seg; // The value must be processed } return 0; } // Prepare enough segments for number of buckets void reserve(size_type buckets) { if( !buckets-- ) return; bool is_initial = !my_size.load(std::memory_order_relaxed); for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m; m = my_mask.load(std::memory_order_relaxed)) { enable_segment( segment_index_of( m+1 ), is_initial ); } } // Swap hash_map_bases void internal_swap_content(hash_map_base &table) { using std::swap; swap_atomics_relaxed(my_mask, table.my_mask); swap_atomics_relaxed(my_size, table.my_size); for(size_type i = 0; i < embedded_buckets; i++) { auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed); my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed), std::memory_order_relaxed); table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed); } for(size_type i = embedded_block; i < pointers_per_table; i++) { auto temp = my_table[i].load(std::memory_order_relaxed); my_table[i].store(table.my_table[i].load(std::memory_order_relaxed), std::memory_order_relaxed); table.my_table[i].store(temp, std::memory_order_relaxed); } } void internal_move(hash_map_base&& other) { my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_size.store(0, std::memory_order_relaxed); for (size_type i = 0; i < embedded_buckets; ++i) { my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed); other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); } for (size_type i = embedded_block; i < pointers_per_table; ++i) { my_table[i].store(other.my_table[i].load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_table[i].store(nullptr, std::memory_order_relaxed); } } protected: bucket_allocator_type my_allocator; // Hash mask = sum of allocated segment sizes - 1 std::atomic my_mask; // Size of container in stored items std::atomic my_size; // It must be in separate cache line from my_mask due to performance effects // Zero segment bucket my_embedded_segment[embedded_buckets]; // Segment pointers table. Also prevents false sharing between my_mask and my_size segments_table_type my_table; }; template class hash_map_range; // Meets requirements of a forward iterator for STL // Value is either the T or const T type of the container. template class hash_map_iterator { using map_type = Container; using node = typename Container::node; using map_base = typename Container::base_type; using node_base = typename map_base::node_base; using bucket = typename map_base::bucket; public: using value_type = Value; using size_type = typename Container::size_type; using difference_type = typename Container::difference_type; using pointer = value_type*; using reference = value_type&; using iterator_category = std::forward_iterator_tag; // Construct undefined iterator hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {} hash_map_iterator( const hash_map_iterator& other ) : my_map(other.my_map), my_index(other.my_index), my_bucket(other.my_bucket), my_node(other.my_node) {} hash_map_iterator& operator=( const hash_map_iterator& other ) { my_map = other.my_map; my_index = other.my_index; my_bucket = other.my_bucket; my_node = other.my_node; return *this; } Value& operator*() const { __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" ); return my_node->value(); } Value* operator->() const {return &operator*();} hash_map_iterator& operator++() { my_node = static_cast( my_node->next ); if( !my_node ) advance_to_next_bucket(); return *this; } // Post increment hash_map_iterator operator++(int) { hash_map_iterator old(*this); operator++(); return old; } private: template friend bool operator==( const hash_map_iterator& i, const hash_map_iterator& j ); template friend bool operator!=( const hash_map_iterator& i, const hash_map_iterator& j ); template friend ptrdiff_t operator-( const hash_map_iterator& i, const hash_map_iterator& j ); template friend class hash_map_iterator; template friend class hash_map_range; void advance_to_next_bucket() { // TODO?: refactor to iterator_base class size_t k = my_index+1; __TBB_ASSERT( my_bucket, "advancing an invalid iterator?"); while (k <= my_map->my_mask.load(std::memory_order_relaxed)) { // Following test uses 2's-complement wizardry if( k&(k-2) ) // not the beginning of a segment ++my_bucket; else my_bucket = my_map->get_bucket( k ); my_node = static_cast( my_bucket->node_list.load(std::memory_order_relaxed) ); if( map_base::is_valid(my_node) ) { my_index = k; return; } ++k; } my_bucket = 0; my_node = 0; my_index = k; // the end } template __TBB_requires(tbb::detail::hash_compare && ch_map_rw_scoped_lockable) #else > __TBB_requires(tbb::detail::hash_compare) #endif friend class concurrent_hash_map; hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) : my_map(&map), my_index(index), my_bucket(b), my_node(static_cast(n)) { if( b && !map_base::is_valid(n) ) advance_to_next_bucket(); } // concurrent_hash_map over which we are iterating. const Container *my_map; // Index in hash table for current item size_t my_index; // Pointer to bucket const bucket* my_bucket; // Pointer to node that has current item node* my_node; }; template bool operator==( const hash_map_iterator& i, const hash_map_iterator& j ) { return i.my_node == j.my_node && i.my_map == j.my_map; } template bool operator!=( const hash_map_iterator& i, const hash_map_iterator& j ) { return i.my_node != j.my_node || i.my_map != j.my_map; } // Range class used with concurrent_hash_map template class hash_map_range { using map_type = typename Iterator::map_type; public: // Type for size of a range using size_type = std::size_t; using value_type = typename Iterator::value_type; using reference = typename Iterator::reference; using difference_type = typename Iterator::difference_type; using iterator = Iterator; // True if range is empty. bool empty() const { return my_begin == my_end; } // True if range can be partitioned into two subranges. bool is_divisible() const { return my_midpoint != my_end; } // Split range. hash_map_range( hash_map_range& r, split ) : my_end(r.my_end), my_grainsize(r.my_grainsize) { r.my_end = my_begin = r.my_midpoint; __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" ); __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" ); set_midpoint(); r.set_midpoint(); } // Init range with container and grainsize specified hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ), my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ), my_grainsize( grainsize_ ) { __TBB_ASSERT( grainsize_>0, "grainsize must be positive" ); set_midpoint(); } Iterator begin() const { return my_begin; } Iterator end() const { return my_end; } // The grain size for this range. size_type grainsize() const { return my_grainsize; } private: Iterator my_begin; Iterator my_end; mutable Iterator my_midpoint; size_t my_grainsize; // Set my_midpoint to point approximately half way between my_begin and my_end. void set_midpoint() const; template friend class hash_map_range; }; template void hash_map_range::set_midpoint() const { // Split by groups of nodes size_t m = my_end.my_index-my_begin.my_index; if( m > my_grainsize ) { m = my_begin.my_index + m/2u; auto b = my_begin.my_map->get_bucket(m); my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed)); } else { my_midpoint = my_end; } __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index, "my_begin is after my_midpoint" ); __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index, "my_midpoint is after my_end" ); __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end, "[my_begin, my_midpoint) range should not be empty" ); } template , typename Allocator = tbb_allocator> #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS , typename MutexType = spin_rw_mutex > __TBB_requires(tbb::detail::hash_compare && ch_map_rw_scoped_lockable) #else > __TBB_requires(tbb::detail::hash_compare) #endif class concurrent_hash_map #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS : protected hash_map_base #else : protected hash_map_base #endif { template friend class hash_map_iterator; template friend class hash_map_range; using allocator_traits_type = tbb::detail::allocator_traits; #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS using base_type = hash_map_base; #else using base_type = hash_map_base; #endif public: using key_type = Key; using mapped_type = T; // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors // and copy/move constructor with explicit allocator argument using allocator_type = tbb::detail::type_identity_t; using hash_compare_type = tbb::detail::type_identity_t; using value_type = std::pair; using size_type = typename base_type::size_type; using difference_type = std::ptrdiff_t; #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS using mutex_type = MutexType; #endif using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; using reference = value_type&; using const_reference = const value_type&; using iterator = hash_map_iterator; using const_iterator = hash_map_iterator; using range_type = hash_map_range; using const_range_type = hash_map_range; protected: static_assert(std::is_same::value, "value_type of the container must be the same as its allocator's"); friend class const_accessor; class node; using segment_index_type = typename base_type::segment_index_type; using segment_ptr_type = typename base_type::segment_ptr_type; using node_base = typename base_type::node_base; using bucket = typename base_type::bucket; using hashcode_type = typename base_type::hashcode_type; using bucket_allocator_type = typename base_type::bucket_allocator_type; using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc; using node_allocator_traits = tbb::detail::allocator_traits; hash_compare_type my_hash_compare; class node : public node_base { public: node() {} ~node() {} pointer storage() { return &my_value; } value_type& value() { return *storage(); } private: union { value_type my_value; }; }; void delete_node( node_base *n ) { node_allocator_type node_allocator(this->get_allocator()); node_allocator_traits::destroy(node_allocator, static_cast(n)->storage()); node_allocator_traits::destroy(node_allocator, static_cast(n)); node_allocator_traits::deallocate(node_allocator, static_cast(n), 1); } template static node* create_node(bucket_allocator_type& allocator, Args&&... args) { node_allocator_type node_allocator(allocator); node* node_ptr = node_allocator_traits::allocate(node_allocator, 1); auto guard = make_raii_guard([&] { node_allocator_traits::destroy(node_allocator, node_ptr); node_allocator_traits::deallocate(node_allocator, node_ptr, 1); }); node_allocator_traits::construct(node_allocator, node_ptr); node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward(args)...); guard.dismiss(); return node_ptr; } static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ return create_node(allocator, key, *t); } static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ return create_node(allocator, key, std::move(*const_cast(t))); } template static node* allocate_node_default_construct(bucket_allocator_type& allocator, const K &key, const T * ){ // Emplace construct an empty T object inside the pair return create_node(allocator, std::piecewise_construct, std::forward_as_tuple(key), std::forward_as_tuple()); } static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){ __TBB_ASSERT(false,"this dummy function should not be called"); return nullptr; } template node *search_bucket( const K &key, bucket *b ) const { node *n = static_cast( b->node_list.load(std::memory_order_relaxed) ); while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first)) n = static_cast( n->next ); __TBB_ASSERT(!rehash_required(n), "Search can be executed only for rehashed bucket"); return n; } // bucket accessor is to find, rehash, acquire a lock, and access a bucket class bucket_accessor : public bucket::scoped_type { bucket *my_b; public: bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); } // find a bucket by masked hashcode, optionally rehash, and acquire the lock inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { my_b = base->get_bucket( h ); // TODO: actually, notification is unnecessary here, just hiding double-check if (rehash_required(my_b->node_list.load(std::memory_order_acquire)) && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) ) { if (rehash_required(my_b->node_list.load(std::memory_order_relaxed))) base->rehash_bucket(my_b, h); // recursive rehashing } else bucket::scoped_type::acquire( my_b->mutex, writer ); __TBB_ASSERT(!rehash_required(my_b->node_list.load(std::memory_order_relaxed)), nullptr); } // get bucket pointer bucket *operator() () { return my_b; } }; // TODO refactor to hash_base void rehash_bucket( bucket *b_new, const hashcode_type hash ) { __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" ); b_new->node_list.store(reinterpret_cast(empty_rehashed_flag), std::memory_order_release); // mark rehashed hashcode_type mask = (hashcode_type(1) << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit bucket_accessor b_old( this, hash & mask ); mask = (mask<<1) | 1; // get full mask for new bucket __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr ); restart: node_base* prev = nullptr; node_base* curr = b_old()->node_list.load(std::memory_order_acquire); while (this->is_valid(curr)) { hashcode_type curr_node_hash = my_hash_compare.hash(static_cast(curr)->value().first); if ((curr_node_hash & mask) == hash) { if (!b_old.is_writer()) { if (!b_old.upgrade_to_writer()) { goto restart; // node ptr can be invalid due to concurrent erase } } node_base* next = curr->next; // exclude from b_old if (prev == nullptr) { b_old()->node_list.store(curr->next, std::memory_order_relaxed); } else { prev->next = curr->next; } this->add_to_bucket(b_new, curr); curr = next; } else { prev = curr; curr = curr->next; } } } template using hash_compare_is_transparent = dependent_bool, U>; public: class accessor; // Combines data access, locking, and garbage collection. class const_accessor : private node::scoped_type /*which derived from no_copy*/ { #if __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS friend class concurrent_hash_map; #else friend class concurrent_hash_map; #endif friend class accessor; public: // Type of value using value_type = const typename concurrent_hash_map::value_type; // True if result is empty. bool empty() const { return !my_node; } // Set to null void release() { if( my_node ) { node::scoped_type::release(); my_node = nullptr; } } // Return reference to associated value in hash table. const_reference operator*() const { __TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); return my_node->value(); } // Return pointer to associated value in hash table. const_pointer operator->() const { return &operator*(); } // Create empty result const_accessor() : my_node(nullptr), my_hash() {} // Destroy result after releasing the underlying reference. ~const_accessor() { my_node = nullptr; // scoped lock's release() is called in its destructor } protected: bool is_writer() { return node::scoped_type::is_writer(); } node *my_node; hashcode_type my_hash; }; // Allows write access to elements and combines data access, locking, and garbage collection. class accessor: public const_accessor { public: // Type of value using value_type = typename concurrent_hash_map::value_type; // Return reference to associated value in hash table. reference operator*() const { __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" ); return this->my_node->value(); } // Return pointer to associated value in hash table. pointer operator->() const { return &operator*(); } }; explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() ) : base_type(a) , my_hash_compare(compare) {} concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {} explicit concurrent_hash_map( const allocator_type& a ) : concurrent_hash_map(hash_compare_type(), a) {} // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) : concurrent_hash_map(a) { this->reserve(n); } concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) : concurrent_hash_map(compare, a) { this->reserve(n); } // Copy constructor concurrent_hash_map( const concurrent_hash_map &table ) : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator())) { try_call( [&] { internal_copy(table); }).on_exception( [&] { this->clear(); }); } concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a) : concurrent_hash_map(a) { try_call( [&] { internal_copy(table); }).on_exception( [&] { this->clear(); }); } // Move constructor concurrent_hash_map( concurrent_hash_map &&table ) : concurrent_hash_map(std::move(table.get_allocator())) { this->internal_move(std::move(table)); } // Move constructor concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a ) : concurrent_hash_map(a) { using is_equal_type = typename node_allocator_traits::is_always_equal; internal_move_construct_with_allocator(std::move(table), a, is_equal_type()); } // Construction with copying iteration range and given allocator instance template concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) : concurrent_hash_map(a) { try_call( [&] { internal_copy(first, last, std::distance(first, last)); }).on_exception( [&] { this->clear(); }); } template concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) : concurrent_hash_map(compare, a) { try_call( [&] { internal_copy(first, last, std::distance(first, last)); }).on_exception( [&] { this->clear(); }); } concurrent_hash_map( std::initializer_list il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() ) : concurrent_hash_map(compare, a) { try_call( [&] { internal_copy(il.begin(), il.end(), il.size()); }).on_exception( [&] { this->clear(); }); } concurrent_hash_map( std::initializer_list il, const allocator_type& a ) : concurrent_hash_map(il, hash_compare_type(), a) {} // Assignment concurrent_hash_map& operator=( const concurrent_hash_map &table ) { if( this != &table ) { clear(); copy_assign_allocators(this->my_allocator, table.my_allocator); internal_copy(table); } return *this; } // Move Assignment concurrent_hash_map& operator=( concurrent_hash_map &&table ) { if( this != &table ) { using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; using is_equal_type = typename node_allocator_traits::is_always_equal; move_assign_allocators(this->my_allocator, table.my_allocator); internal_move_assign(std::move(table), tbb::detail::disjunction()); } return *this; } // Assignment concurrent_hash_map& operator=( std::initializer_list il ) { clear(); internal_copy(il.begin(), il.end(), il.size()); return *this; } // Rehashes and optionally resizes the whole table. /** Useful to optimize performance before or after concurrent operations. Also enables using of find() and count() concurrent methods in serial context. */ void rehash(size_type sz = 0) { this->reserve(sz); // TODO: add reduction of number of buckets as well hashcode_type mask = this->my_mask.load(std::memory_order_relaxed); hashcode_type b = (mask+1)>>1; // size or first index of the last segment __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2 bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing for(; b <= mask; b++, bp++ ) { node_base *n = bp->node_list.load(std::memory_order_relaxed); __TBB_ASSERT( this->is_valid(n) || empty_rehashed(n) || rehash_required(n), "Broken internal structure" ); __TBB_ASSERT( *reinterpret_cast(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" ); if (rehash_required(n)) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one hashcode_type h = b; bucket *b_old = bp; do { __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); hashcode_type m = ( hashcode_type(1) << tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit b_old = this->get_bucket( h &= m ); } while( rehash_required(b_old->node_list.load(std::memory_order_relaxed)) ); // now h - is index of the root rehashed bucket b_old this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments node_base* prev = nullptr; node_base* curr = b_old->node_list.load(std::memory_order_relaxed); while (this->is_valid(curr)) { hashcode_type curr_node_hash = my_hash_compare.hash(static_cast(curr)->value().first); if ((curr_node_hash & mask) != h) { // should be rehashed node_base* next = curr->next; // exclude from b_old if (prev == nullptr) { b_old->node_list.store(curr->next, std::memory_order_relaxed); } else { prev->next = curr->next; } bucket *b_new = this->get_bucket(curr_node_hash & mask); __TBB_ASSERT(!rehash_required(b_new->node_list.load(std::memory_order_relaxed)), "hash() function changed for key in table or internal error"); this->add_to_bucket(b_new, curr); curr = next; } else { prev = curr; curr = curr->next; } } } } } // Clear table void clear() { hashcode_type m = this->my_mask.load(std::memory_order_relaxed); __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); this->my_size.store(0, std::memory_order_relaxed); segment_index_type s = this->segment_index_of( m ); __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" ); do { __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" ); segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed); size_type sz = this->segment_size( s ? s : 1 ); for( segment_index_type i = 0; i < sz; i++ ) for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed); this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) ) { buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed); delete_node( n ); } this->delete_segment(s); } while(s-- > 0); this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed); } // Clear table and destroy it. ~concurrent_hash_map() { clear(); } //------------------------------------------------------------------------ // Parallel algorithm support //------------------------------------------------------------------------ range_type range( size_type grainsize=1 ) { return range_type( *this, grainsize ); } const_range_type range( size_type grainsize=1 ) const { return const_range_type( *this, grainsize ); } //------------------------------------------------------------------------ // STL support - not thread-safe methods //------------------------------------------------------------------------ iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } iterator end() { return iterator( *this, 0, 0, 0 ); } const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); } const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); } std::pair equal_range( const Key& key ) { return internal_equal_range( key, end() ); } std::pair equal_range( const Key& key ) const { return internal_equal_range( key, end() ); } template typename std::enable_if::value, std::pair>::type equal_range( const K& key ) { return internal_equal_range(key, end()); } template typename std::enable_if::value, std::pair>::type equal_range( const K& key ) const { return internal_equal_range(key, end()); } // Number of items in table. size_type size() const { return this->my_size.load(std::memory_order_acquire); } // True if size()==0. __TBB_nodiscard bool empty() const { return size() == 0; } // Upper bound on size. size_type max_size() const { return allocator_traits_type::max_size(base_type::get_allocator()); } // Returns the current number of buckets size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; } // return allocator object allocator_type get_allocator() const { return base_type::get_allocator(); } // swap two instances. Iterators are invalidated void swap(concurrent_hash_map& table) { using pocs_type = typename node_allocator_traits::propagate_on_container_swap; using is_equal_type = typename node_allocator_traits::is_always_equal; swap_allocators(this->my_allocator, table.my_allocator); internal_swap(table, tbb::detail::disjunction()); } //------------------------------------------------------------------------ // concurrent map operations //------------------------------------------------------------------------ // Return count of items (0 or 1) size_type count( const Key &key ) const { return const_cast(this)->lookup(key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node); } template typename std::enable_if::value, size_type>::type count( const K& key ) const { return const_cast(this)->lookup(key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node); } // Find item and acquire a read lock on the item. /** Return true if item is found, false otherwise. */ bool find( const_accessor &result, const Key &key ) const { result.release(); return const_cast(this)->lookup(key, nullptr, &result, /*write=*/false, &do_not_allocate_node ); } // Find item and acquire a write lock on the item. /** Return true if item is found, false otherwise. */ bool find( accessor &result, const Key &key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/true, &do_not_allocate_node); } template typename std::enable_if::value, bool>::type find( const_accessor& result, const K& key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/false, &do_not_allocate_node); } template typename std::enable_if::value, bool>::type find( accessor& result, const K& key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/true, &do_not_allocate_node); } // Insert item (if not already present) and acquire a read lock on the item. /** Returns true if item is new. */ bool insert( const_accessor &result, const Key &key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/false, &allocate_node_default_construct<>); } // Insert item (if not already present) and acquire a write lock on the item. /** Returns true if item is new. */ bool insert( accessor &result, const Key &key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/true, &allocate_node_default_construct<>); } template typename std::enable_if::value && std::is_constructible::value, bool>::type insert( const_accessor& result, const K& key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/true, &allocate_node_default_construct); } template typename std::enable_if::value && std::is_constructible::value, bool>::type insert( accessor& result, const K& key ) { result.release(); return lookup(key, nullptr, &result, /*write=*/true, &allocate_node_default_construct); } // Insert item by copying if there is no such key present already and acquire a read lock on the item. /** Returns true if item is new. */ bool insert( const_accessor &result, const value_type &value ) { result.release(); return lookup(value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct); } // Insert item by copying if there is no such key present already and acquire a write lock on the item. /** Returns true if item is new. */ bool insert( accessor &result, const value_type &value ) { result.release(); return lookup(value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct); } // Insert item by copying if there is no such key present already /** Returns true if item is inserted. */ bool insert( const value_type &value ) { return lookup(value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct); } // Insert item by copying if there is no such key present already and acquire a read lock on the item. /** Returns true if item is new. */ bool insert( const_accessor &result, value_type && value ) { return generic_move_insert(result, std::move(value)); } // Insert item by copying if there is no such key present already and acquire a write lock on the item. /** Returns true if item is new. */ bool insert( accessor &result, value_type && value ) { return generic_move_insert(result, std::move(value)); } // Insert item by copying if there is no such key present already /** Returns true if item is inserted. */ bool insert( value_type && value ) { return generic_move_insert(accessor_not_used(), std::move(value)); } // Insert item by copying if there is no such key present already and acquire a read lock on the item. /** Returns true if item is new. */ template bool emplace( const_accessor &result, Args&&... args ) { return generic_emplace(result, std::forward(args)...); } // Insert item by copying if there is no such key present already and acquire a write lock on the item. /** Returns true if item is new. */ template bool emplace( accessor &result, Args&&... args ) { return generic_emplace(result, std::forward(args)...); } // Insert item by copying if there is no such key present already /** Returns true if item is inserted. */ template bool emplace( Args&&... args ) { return generic_emplace(accessor_not_used(), std::forward(args)...); } // Insert range [first, last) template void insert( I first, I last ) { for ( ; first != last; ++first ) insert( *first ); } // Insert initializer list void insert( std::initializer_list il ) { insert( il.begin(), il.end() ); } // Erase item. /** Return true if item was erased by particularly this call. */ bool erase( const Key &key ) { return internal_erase(key); } template typename std::enable_if::value, bool>::type erase( const K& key ) { return internal_erase(key); } // Erase item by const_accessor. /** Return true if item was erased by particularly this call. */ bool erase( const_accessor& item_accessor ) { return exclude( item_accessor ); } // Erase item by accessor. /** Return true if item was erased by particularly this call. */ bool erase( accessor& item_accessor ) { return exclude( item_accessor ); } protected: template node* allocate_node_helper( const K& key, const T* t, AllocateNodeType allocate_node, std::true_type ) { return allocate_node(base_type::get_allocator(), key, t); } template node* allocate_node_helper( const K&, const T*, AllocateNodeType, std::false_type ) { __TBB_ASSERT(false, "allocate_node_helper with std::false_type should never been called"); return nullptr; } // Insert or find item and optionally acquire a lock on the item. template bool lookup( const K &key, const T *t, const_accessor *result, bool write, AllocateNodeType allocate_node, node *tmp_n = 0) { __TBB_ASSERT( !result || !result->my_node, nullptr ); bool return_value; hashcode_type const h = my_hash_compare.hash( key ); hashcode_type m = this->my_mask.load(std::memory_order_acquire); segment_index_type grow_segment = 0; node *n; restart: {//lock scope __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); return_value = false; // get bucket bucket_accessor b( this, h & m ); // find a node n = search_bucket( key, b() ); if( OpInsert ) { // [opt] insert a key if( !n ) { if( !tmp_n ) { tmp_n = allocate_node_helper(key, t, allocate_node, std::integral_constant{}); } while ( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion // Rerun search list, in case another thread inserted the intem during the upgrade n = search_bucket(key, b()); if (this->is_valid(n)) { // unfortunately, it did if (!b.downgrade_to_reader()) { // If the lock was downgraded with reacquiring the mutex // Rerun search list in case another thread removed the item during the downgrade n = search_bucket(key, b()); if (!this->is_valid(n)) { // Unfortunately, it did // We need to try upgrading to writer again continue; } } goto exists; } } if( this->check_mask_race(h, m) ) goto restart; // b.release() is done in ~b(). // insert and set flag to grow the container grow_segment = this->insert_new_node( b(), n = tmp_n, m ); tmp_n = 0; return_value = true; } } else { // find or count if( !n ) { if( this->check_mask_race( h, m ) ) goto restart; // b.release() is done in ~b(). TODO: replace by continue return false; } return_value = true; } exists: if( !result ) goto check_growth; // TODO: the following seems as generic/regular operation // acquire the item if( !result->try_acquire( n->mutex, write ) ) { for( tbb::detail::atomic_backoff backoff(true);; ) { if( result->try_acquire( n->mutex, write ) ) break; if( !backoff.bounded_pause() ) { // the wait takes really long, restart the operation b.release(); __TBB_ASSERT( !OpInsert || !return_value, "Can't acquire new item in locked bucket?" ); yield(); m = this->my_mask.load(std::memory_order_acquire); goto restart; } } } }//lock scope result->my_node = n; result->my_hash = h; check_growth: // [opt] grow the container if( grow_segment ) { this->enable_segment( grow_segment ); } if( tmp_n ) // if OpInsert only delete_node( tmp_n ); return return_value; } struct accessor_not_used { void release(){}}; friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;} friend const_accessor* accessor_location( const_accessor & a ) { return &a;} friend bool is_write_access_needed( accessor const& ) { return true;} friend bool is_write_access_needed( const_accessor const& ) { return false;} friend bool is_write_access_needed( accessor_not_used const& ) { return false;} template bool generic_move_insert( Accessor && result, value_type && value ) { result.release(); return lookup(value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct); } template bool generic_emplace( Accessor && result, Args &&... args ) { result.release(); node * node_ptr = create_node(base_type::get_allocator(), std::forward(args)...); return lookup(node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr); } // delete item by accessor bool exclude( const_accessor &item_accessor ) { __TBB_ASSERT( item_accessor.my_node, nullptr ); node_base *const exclude_node = item_accessor.my_node; hashcode_type const hash = item_accessor.my_hash; hashcode_type mask = this->my_mask.load(std::memory_order_acquire); do { // get bucket bucket_accessor b( this, hash & mask, /*writer=*/true ); node_base* prev = nullptr; node_base* curr = b()->node_list.load(std::memory_order_relaxed); while (curr && curr != exclude_node) { prev = curr; curr = curr->next; } if (curr == nullptr) { // someone else was first if (this->check_mask_race(hash, mask)) continue; item_accessor.release(); return false; } __TBB_ASSERT( curr == exclude_node, nullptr ); // remove from container if (prev == nullptr) { b()->node_list.store(curr->next, std::memory_order_relaxed); } else { prev->next = curr->next; } this->my_size--; break; } while(true); if (!item_accessor.is_writer()) { // need to get exclusive lock item_accessor.upgrade_to_writer(); // return value means nothing here } item_accessor.release(); delete_node(exclude_node); // Only one thread can delete it return true; } template bool internal_erase( const K& key ) { node_base *erase_node; hashcode_type const hash = my_hash_compare.hash(key); hashcode_type mask = this->my_mask.load(std::memory_order_acquire); restart: {//lock scope // get bucket bucket_accessor b( this, hash & mask ); search: node_base* prev = nullptr; erase_node = b()->node_list.load(std::memory_order_relaxed); while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast(erase_node)->value().first ) ) { prev = erase_node; erase_node = erase_node->next; } if (erase_node == nullptr) { // not found, but mask could be changed if (this->check_mask_race(hash, mask)) goto restart; return false; } else if (!b.is_writer() && !b.upgrade_to_writer()) { if (this->check_mask_race(hash, mask)) // contended upgrade, check mask goto restart; goto search; } // remove from container if (prev == nullptr) { b()->node_list.store(erase_node->next, std::memory_order_relaxed); } else { prev->next = erase_node->next; } this->my_size--; } { typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true ); } // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor! delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket return true; } // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) template std::pair internal_equal_range( const K& key, I end_ ) const { hashcode_type h = my_hash_compare.hash( key ); hashcode_type m = this->my_mask.load(std::memory_order_relaxed); __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); h &= m; bucket *b = this->get_bucket( h ); while (rehash_required(b->node_list.load(std::memory_order_relaxed))) { m = ( hashcode_type(1) << tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit b = this->get_bucket( h &= m ); } node *n = search_bucket( key, b ); if( !n ) return std::make_pair(end_, end_); iterator lower(*this, h, b, n), upper(lower); return std::make_pair(lower, ++upper); } // Copy "source" to *this, where *this must start out empty. void internal_copy( const concurrent_hash_map& source ) { hashcode_type mask = source.my_mask.load(std::memory_order_relaxed); if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor? bucket *dst = 0, *src = 0; bool rehashing_required = false; for( hashcode_type k = 0; k <= mask; k++ ) { if( k & (k-2) ) ++dst,src++; // not the beginning of a segment else { dst = this->get_bucket( k ); src = source.get_bucket( k ); } __TBB_ASSERT(!rehash_required(dst->node_list.load(std::memory_order_relaxed)), "Invalid bucket in destination table"); node *n = static_cast( src->node_list.load(std::memory_order_relaxed) ); if (rehash_required(n)) { // source is not rehashed, items are in previous buckets rehashing_required = true; dst->node_list.store(reinterpret_cast(rehash_req_flag), std::memory_order_relaxed); } else for(; n; n = static_cast( n->next ) ) { node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second); this->add_to_bucket( dst, node_ptr); this->my_size.fetch_add(1, std::memory_order_relaxed); } } if( rehashing_required ) rehash(); } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed)); } template void internal_copy( I first, I last, size_type reserve_size ) { this->reserve(reserve_size); // TODO: load_factor? hashcode_type m = this->my_mask.load(std::memory_order_relaxed); for(; first != last; ++first) { hashcode_type h = my_hash_compare.hash( (*first).first ); bucket *b = this->get_bucket( h & m ); __TBB_ASSERT(!rehash_required(b->node_list.load(std::memory_order_relaxed)), "Invalid bucket in destination table"); node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second); this->add_to_bucket( b, node_ptr ); ++this->my_size; // TODO: replace by non-atomic op } } void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&, /*is_always_equal=*/std::true_type ) { this->internal_move(std::move(other)); } void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a, /*is_always_equal=*/std::false_type ) { if (a == other.get_allocator()){ this->internal_move(std::move(other)); } else { try_call( [&] { internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), other.size()); }).on_exception( [&] { this->clear(); }); } } void internal_move_assign( concurrent_hash_map&& other, /*is_always_equal || POCMA = */std::true_type) { this->internal_move(std::move(other)); } void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) { if (this->my_allocator == other.my_allocator) { this->internal_move(std::move(other)); } else { //do per element move internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), other.size()); } } void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) { this->internal_swap_content(other); } void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) { __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr); this->internal_swap_content(other); } // Fast find when no concurrent erasure is used. For internal use inside TBB only! /** Return pointer to item with given key, or nullptr if no such item exists. Must not be called concurrently with erasure operations. */ const_pointer internal_fast_find( const Key& key ) const { hashcode_type h = my_hash_compare.hash( key ); hashcode_type m = this->my_mask.load(std::memory_order_acquire); node *n; restart: __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); bucket *b = this->get_bucket( h & m ); // TODO: actually, notification is unnecessary here, just hiding double-check if (rehash_required(b->node_list.load(std::memory_order_acquire))) { typename bucket::scoped_type lock; if( lock.try_acquire( b->mutex, /*write=*/true ) ) { if (rehash_required(b->node_list.load(std::memory_order_relaxed))) const_cast(this)->rehash_bucket( b, h & m ); //recursive rehashing } else lock.acquire( b->mutex, /*write=*/false ); __TBB_ASSERT(!rehash_required(b->node_list.load(std::memory_order_relaxed)), nullptr); } n = search_bucket( key, b ); if( n ) return n->storage(); else if( this->check_mask_race( h, m ) ) goto restart; return 0; } }; #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename Alloc = tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() ) -> concurrent_hash_map, iterator_mapped_t, HashCompare, Alloc>; template >, typename = std::enable_if_t>> concurrent_hash_map( It, It, Alloc ) -> concurrent_hash_map, iterator_mapped_t, d1::tbb_hash_compare>, Alloc>; template >, typename Alloc = tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_hash_map( std::initializer_list>, HashCompare = HashCompare(), Alloc = Alloc() ) -> concurrent_hash_map, T, HashCompare, Alloc>; template >> concurrent_hash_map( std::initializer_list>, Alloc ) -> concurrent_hash_map, T, d1::tbb_hash_compare>, Alloc>; #endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ template inline bool operator==(const concurrent_hash_map &a, const concurrent_hash_map &b) { if(a.size() != b.size()) return false; typename concurrent_hash_map::const_iterator i(a.begin()), i_end(a.end()); typename concurrent_hash_map::const_iterator j, j_end(b.end()); for(; i != i_end; ++i) { j = b.equal_range(i->first).first; if( j == j_end || !(i->second == j->second) ) return false; } return true; } #if !__TBB_CPP20_COMPARISONS_PRESENT template inline bool operator!=(const concurrent_hash_map &a, const concurrent_hash_map &b) { return !(a == b); } #endif // !__TBB_CPP20_COMPARISONS_PRESENT template inline void swap(concurrent_hash_map &a, concurrent_hash_map &b) { a.swap( b ); } } // namespace d2 } // namespace detail inline namespace v1 { using detail::split; using detail::d2::concurrent_hash_map; using detail::d1::tbb_hash_compare; } // namespace v1 } // namespace tbb #endif /* __TBB_concurrent_hash_map_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/concurrent_lru_cache.h000066400000000000000000000326511514453371700331470ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_lru_cache_H #define __TBB_concurrent_lru_cache_H #if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h #endif #include "detail/_assert.h" #include "detail/_aggregator.h" #include // for std::map #include // for std::list #include // for std::make_pair #include // for std::find #include // for std::atomic namespace tbb { namespace detail { namespace d1 { //----------------------------------------------------------------------------- // Concurrent LRU cache //----------------------------------------------------------------------------- template class concurrent_lru_cache : no_assign { // incapsulated helper classes private: struct handle_object; struct storage_map_value_type; struct aggregator_operation; struct retrieve_aggregator_operation; struct signal_end_of_usage_aggregator_operation; // typedefs public: using key_type = KeyT; using value_type = ValT; using pointer = ValT*; using reference = ValT&; using const_pointer = const ValT*; using const_reference = const ValT&; using value_function_type = KeyToValFunctorT; using handle = handle_object; private: using lru_cache_type = concurrent_lru_cache; using storage_map_type = std::map; using storage_map_iterator_type = typename storage_map_type::iterator; using storage_map_pointer_type = typename storage_map_type::pointer; using storage_map_reference_type = typename storage_map_type::reference; using history_list_type = std::list; using history_list_iterator_type = typename history_list_type::iterator; using aggregator_operation_type = aggregator_operation; using aggregator_function_type = aggregating_functor; using aggregator_type = aggregator; friend class aggregating_functor; // fields private: value_function_type my_value_function; aggregator_type my_aggregator; storage_map_type my_storage_map; // storage map for used objects history_list_type my_history_list; // history list for unused objects const std::size_t my_history_list_capacity; // history list's allowed capacity // interface public: concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity) : my_value_function(value_function), my_history_list_capacity(cache_capacity) { my_aggregator.initialize_handler(aggregator_function_type(this)); } handle operator[](key_type key) { retrieve_aggregator_operation op(key); my_aggregator.execute(&op); if (op.is_new_value_needed()) { op.result().second.my_value = my_value_function(key); op.result().second.my_is_ready.store(true, std::memory_order_release); } else { spin_wait_while_eq(op.result().second.my_is_ready, false); } return handle(*this, op.result()); } private: void handle_operations(aggregator_operation* op_list) { while (op_list) { op_list->cast_and_handle(*this); aggregator_operation* prev_op = op_list; op_list = op_list->next; (prev_op->status).store(1, std::memory_order_release); } } void signal_end_of_usage(storage_map_reference_type map_record_ref) { signal_end_of_usage_aggregator_operation op(map_record_ref); my_aggregator.execute(&op); } void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) { storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first); __TBB_ASSERT(map_it != my_storage_map.end(), "cache should not return past-end iterators to outer world"); __TBB_ASSERT(&(*map_it) == &map_record_ref, "dangling reference has been returned to outside world: data race?"); __TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(), "object in use should not be in list of unused objects "); // if it was the last reference, put it to the LRU history if (! --(map_it->second.my_ref_counter)) { // if the LRU history is full, evict the oldest items to get space if (my_history_list.size() >= my_history_list_capacity) { std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity; for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) { storage_map_iterator_type map_it_to_evict = my_history_list.back(); __TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0, "item to be evicted should not have a live references"); // TODO: can we use forward_list instead of list? pop_front / insert_after last my_history_list.pop_back(); my_storage_map.erase(map_it_to_evict); } } // TODO: can we use forward_list instead of list? pop_front / insert_after last my_history_list.push_front(map_it); map_it->second.my_history_list_iterator = my_history_list.begin(); } } storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) { storage_map_iterator_type map_it = my_storage_map.find(key); if (map_it == my_storage_map.end()) { map_it = my_storage_map.emplace_hint( map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false)); is_new_value_needed = true; } else { history_list_iterator_type list_it = map_it->second.my_history_list_iterator; if (list_it != my_history_list.end()) { __TBB_ASSERT(map_it->second.my_ref_counter == 0, "item to be evicted should not have a live references"); // Item is going to be used. Therefore it is not a subject for eviction, // so we remove it from LRU history. my_history_list.erase(list_it); map_it->second.my_history_list_iterator = my_history_list.end(); } } ++(map_it->second.my_ref_counter); return *map_it; } }; //----------------------------------------------------------------------------- // Value type for storage map in concurrent LRU cache //----------------------------------------------------------------------------- template struct concurrent_lru_cache::storage_map_value_type { //typedefs public: using ref_counter_type = std::size_t; // fields public: value_type my_value; ref_counter_type my_ref_counter; history_list_iterator_type my_history_list_iterator; std::atomic my_is_ready; // interface public: storage_map_value_type( value_type const& value, ref_counter_type ref_counter, history_list_iterator_type history_list_iterator, bool is_ready) : my_value(value), my_ref_counter(ref_counter), my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {} }; //----------------------------------------------------------------------------- // Handle object for operator[] in concurrent LRU cache //----------------------------------------------------------------------------- template struct concurrent_lru_cache::handle_object { // fields private: lru_cache_type* my_lru_cache_ptr; storage_map_pointer_type my_map_record_ptr; // interface public: handle_object() : my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {} handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref) : my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {} handle_object(handle_object&) = delete; void operator=(handle_object&) = delete; handle_object(handle_object&& other) : my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) { __TBB_ASSERT( (other.my_lru_cache_ptr != nullptr && other.my_map_record_ptr != nullptr) || (other.my_lru_cache_ptr == nullptr && other.my_map_record_ptr == nullptr), "invalid state of moving object?"); other.my_lru_cache_ptr = nullptr; other.my_map_record_ptr = nullptr; } handle_object& operator=(handle_object&& other) { __TBB_ASSERT( (other.my_lru_cache_ptr != nullptr && other.my_map_record_ptr != nullptr) || (other.my_lru_cache_ptr == nullptr && other.my_map_record_ptr == nullptr), "invalid state of moving object?"); if (my_lru_cache_ptr) my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); my_lru_cache_ptr = other.my_lru_cache_ptr; my_map_record_ptr = other.my_map_record_ptr; other.my_lru_cache_ptr = nullptr; other.my_map_record_ptr = nullptr; return *this; } ~handle_object() { if (my_lru_cache_ptr) my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); } operator bool() const { return (my_lru_cache_ptr && my_map_record_ptr); } value_type& value() { __TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?"); __TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?"); return my_map_record_ptr->second.my_value; } }; //----------------------------------------------------------------------------- // Aggregator operation for aggregator type in concurrent LRU cache //----------------------------------------------------------------------------- template struct concurrent_lru_cache::aggregator_operation : aggregated_operation { // incapsulated helper classes public: enum class op_type { retrieve, signal_end_of_usage }; // fields private: op_type my_op; // interface public: aggregator_operation(op_type op) : my_op(op) {} // TODO: aggregator_operation can be implemented // - as a statically typed variant type or CRTP? (static, dependent on the use case) // - or use pointer to function and apply_visitor (dynamic) // - or use virtual functions (dynamic) void cast_and_handle(lru_cache_type& lru_cache_ref) { if (my_op == op_type::retrieve) static_cast(this)->handle(lru_cache_ref); else static_cast(this)->handle(lru_cache_ref); } }; template struct concurrent_lru_cache::retrieve_aggregator_operation : aggregator_operation, private no_assign { public: key_type my_key; storage_map_pointer_type my_map_record_ptr; bool my_is_new_value_needed; public: retrieve_aggregator_operation(key_type key) : aggregator_operation(aggregator_operation::op_type::retrieve), my_key(key), my_is_new_value_needed(false) {} void handle(lru_cache_type& lru_cache_ref) { my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed); } storage_map_reference_type result() { return *my_map_record_ptr; } bool is_new_value_needed() { return my_is_new_value_needed; } }; template struct concurrent_lru_cache::signal_end_of_usage_aggregator_operation : aggregator_operation, private no_assign { private: storage_map_reference_type my_map_record_ref; public: signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref) : aggregator_operation(aggregator_operation::op_type::signal_end_of_usage), my_map_record_ref(map_record_ref) {} void handle(lru_cache_type& lru_cache_ref) { lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref); } }; // TODO: if we have guarantees that KeyToValFunctorT always have // ValT as a return type and KeyT as an argument type // we can deduce template parameters of concurrent_lru_cache // by pattern matching on KeyToValFunctorT } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::concurrent_lru_cache; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_lru_cache_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/concurrent_map.h000066400000000000000000000324751514453371700320030ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_map_H #define __TBB_concurrent_map_H #include "detail/_namespace_injection.h" #include "detail/_concurrent_skip_list.h" #include "tbb_allocator.h" #include #include #include namespace tbb { namespace detail { namespace d2 { template struct map_traits { static constexpr std::size_t max_level = RandomGenerator::max_level; using random_level_generator_type = RandomGenerator; using key_type = Key; using mapped_type = Value; using compare_type = KeyCompare; using value_type = std::pair; using reference = value_type&; using const_reference = const value_type&; using allocator_type = Allocator; static constexpr bool allow_multimapping = AllowMultimapping; class value_compare { public: bool operator()(const value_type& lhs, const value_type& rhs) const { return comp(lhs.first, rhs.first); } protected: value_compare(compare_type c) : comp(c) {} friend struct map_traits; compare_type comp; }; static value_compare value_comp(compare_type comp) { return value_compare(comp); } static const key_type& get_key(const_reference val) { return val.first; } }; // struct map_traits template class concurrent_multimap; template , typename Allocator = tbb::tbb_allocator>> class concurrent_map : public concurrent_skip_list, Allocator, false>> { using base_type = concurrent_skip_list, Allocator, false>>; public: using key_type = Key; using mapped_type = Value; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using key_compare = Compare; using value_compare = typename base_type::value_compare; using allocator_type = Allocator; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using node_type = typename base_type::node_type; // Include constructors of base type using base_type::base_type; // Required for implicit deduction guides concurrent_map() = default; concurrent_map( const concurrent_map& ) = default; concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_map( concurrent_map&& ) = default; concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_map& operator=( const concurrent_map& ) = default; concurrent_map& operator=( concurrent_map&& ) = default; concurrent_map& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } // Observers mapped_type& at(const key_type& key) { iterator it = this->find(key); if (it == this->end()) { throw_exception(exception_id::invalid_key); } return it->second; } const mapped_type& at(const key_type& key) const { return const_cast(this)->at(key); } mapped_type& operator[](const key_type& key) { iterator it = this->find(key); if (it == this->end()) { it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; } return it->second; } mapped_type& operator[](key_type&& key) { iterator it = this->find(key); if (it == this->end()) { it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; } return it->second; } using base_type::insert; template typename std::enable_if::value, std::pair>::type insert( P&& value ) { return this->emplace(std::forward

(value)); } template typename std::enable_if::value, iterator>::type insert( const_iterator hint, P&& value ) { return this->emplace_hint(hint, std::forward

(value)); } template void merge(concurrent_map& source) { this->internal_merge(source); } template void merge(concurrent_map&& source) { this->internal_merge(std::move(source)); } template void merge(concurrent_multimap& source) { this->internal_merge(source); } template void merge(concurrent_multimap&& source) { this->internal_merge(std::move(source)); } }; // class concurrent_map #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() ) -> concurrent_map, iterator_mapped_t, Comp, Alloc>; template >, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_map( std::initializer_list>, Comp = Comp(), Alloc = Alloc() ) -> concurrent_map, T, Comp, Alloc>; template >, typename = std::enable_if_t>> concurrent_map( It, It, Alloc ) -> concurrent_map, iterator_mapped_t, std::less>, Alloc>; template >> concurrent_map( std::initializer_list>, Alloc ) -> concurrent_map, T, std::less>, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_map& lhs, concurrent_map& rhs ) { lhs.swap(rhs); } template , typename Allocator = tbb::tbb_allocator>> class concurrent_multimap : public concurrent_skip_list, Allocator, true>> { using base_type = concurrent_skip_list, Allocator, true>>; public: using key_type = Key; using mapped_type = Value; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using key_compare = Compare; using value_compare = typename base_type::value_compare; using allocator_type = Allocator; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using node_type = typename base_type::node_type; // Include constructors of base_type using base_type::base_type; using base_type::insert; // Required for implicit deduction guides concurrent_multimap() = default; concurrent_multimap( const concurrent_multimap& ) = default; concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_multimap( concurrent_multimap&& ) = default; concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_multimap& operator=( const concurrent_multimap& ) = default; concurrent_multimap& operator=( concurrent_multimap&& ) = default; concurrent_multimap& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } template typename std::enable_if::value, std::pair>::type insert( P&& value ) { return this->emplace(std::forward

(value)); } template typename std::enable_if::value, iterator>::type insert( const_iterator hint, P&& value ) { return this->emplace_hint(hint, std::forward

(value)); } template void merge(concurrent_multimap& source) { this->internal_merge(source); } template void merge(concurrent_multimap&& source) { this->internal_merge(std::move(source)); } template void merge(concurrent_map& source) { this->internal_merge(source); } template void merge(concurrent_map&& source) { this->internal_merge(std::move(source)); } }; // class concurrent_multimap #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() ) -> concurrent_multimap, iterator_mapped_t, Comp, Alloc>; template >, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_multimap( std::initializer_list>, Comp = Comp(), Alloc = Alloc() ) -> concurrent_multimap, T, Comp, Alloc>; template >, typename = std::enable_if_t>> concurrent_multimap( It, It, Alloc ) -> concurrent_multimap, iterator_mapped_t, std::less>, Alloc>; template >> concurrent_multimap( std::initializer_list>, Alloc ) -> concurrent_multimap, T, std::less>, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_multimap& lhs, concurrent_multimap& rhs ) { lhs.swap(rhs); } } // namespace d2 } // namespace detail inline namespace v1 { using detail::d2::concurrent_map; using detail::d2::concurrent_multimap; using detail::split; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_map_H concurrent_priority_queue.h000066400000000000000000000461131514453371700342260ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_priority_queue_H #define __TBB_concurrent_priority_queue_H #include "detail/_namespace_injection.h" #include "detail/_aggregator.h" #include "detail/_template_helpers.h" #include "detail/_allocator_traits.h" #include "detail/_range_common.h" #include "detail/_exception.h" #include "detail/_utils.h" #include "detail/_containers_helpers.h" #include "cache_aligned_allocator.h" #include #include #include #include #include #include namespace tbb { namespace detail { namespace d1 { template , typename Allocator = cache_aligned_allocator> class concurrent_priority_queue { public: using value_type = T; using reference = T&; using const_reference = const T&; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using allocator_type = Allocator; concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {} explicit concurrent_priority_queue( const allocator_type& alloc ) : mark(0), my_size(0), my_compare(), data(alloc) { my_aggregator.initialize_handler(functor{this}); } explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() ) : mark(0), my_size(0), my_compare(compare), data(alloc) { my_aggregator.initialize_handler(functor{this}); } explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() ) : mark(0), my_size(0), my_compare(), data(alloc) { data.reserve(init_capacity); my_aggregator.initialize_handler(functor{this}); } explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() ) : mark(0), my_size(0), my_compare(compare), data(alloc) { data.reserve(init_capacity); my_aggregator.initialize_handler(functor{this}); } template concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() ) : mark(0), my_compare(compare), data(begin, end, alloc) { my_aggregator.initialize_handler(functor{this}); heapify(); my_size.store(data.size(), std::memory_order_relaxed); } template concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() ) : concurrent_priority_queue(begin, end, Compare(), alloc) {} concurrent_priority_queue( std::initializer_list init, const Compare& compare, const allocator_type& alloc = allocator_type() ) : concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {} concurrent_priority_queue( std::initializer_list init, const allocator_type& alloc = allocator_type() ) : concurrent_priority_queue(init, Compare(), alloc) {} concurrent_priority_queue( const concurrent_priority_queue& other ) : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), data(other.data) { my_aggregator.initialize_handler(functor{this}); } concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc ) : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), data(other.data, alloc) { my_aggregator.initialize_handler(functor{this}); } concurrent_priority_queue( concurrent_priority_queue&& other ) : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), data(std::move(other.data)) { my_aggregator.initialize_handler(functor{this}); } concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc ) : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), data(std::move(other.data), alloc) { my_aggregator.initialize_handler(functor{this}); } concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) { if (this != &other) { data = other.data; mark = other.mark; my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); } return *this; } concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) { if (this != &other) { // TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately data = std::move(other.data); mark = other.mark; my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); } return *this; } concurrent_priority_queue& operator=( std::initializer_list init ) { assign(init.begin(), init.end()); return *this; } template void assign( InputIterator begin, InputIterator end ) { data.assign(begin, end); mark = 0; my_size.store(data.size(), std::memory_order_relaxed); heapify(); } void assign( std::initializer_list init ) { assign(init.begin(), init.end()); } /* Returned value may not reflect results of pending operations. This operation reads shared data and will trigger a race condition. */ __TBB_nodiscard bool empty() const { return size() == 0; } // Returns the current number of elements contained in the queue /* Returned value may not reflect results of pending operations. This operation reads shared data and will trigger a race condition. */ size_type size() const { return my_size.load(std::memory_order_relaxed); } /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ void push( const value_type& value ) { cpq_operation op_data(value, PUSH_OP); my_aggregator.execute(&op_data); if (op_data.status == FAILED) throw_exception(exception_id::bad_alloc); } /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ void push( value_type&& value ) { cpq_operation op_data(value, PUSH_RVALUE_OP); my_aggregator.execute(&op_data); if (op_data.status == FAILED) throw_exception(exception_id::bad_alloc); } /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ template void emplace( Args&&... args ) { // TODO: support uses allocator construction in this place push(value_type(std::forward(args)...)); } // Gets a reference to and removes highest priority element /* If a highest priority element was found, sets elem and returns true, otherwise returns false. This operation can be safely used concurrently with other push, try_pop or emplace operations. */ bool try_pop( value_type& value ) { cpq_operation op_data(value, POP_OP); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } // This operation affects the whole container => it is not thread-safe void clear() { data.clear(); mark = 0; my_size.store(0, std::memory_order_relaxed); } // This operation affects the whole container => it is not thread-safe void swap( concurrent_priority_queue& other ) { if (this != &other) { using std::swap; swap(data, other.data); swap(mark, other.mark); size_type sz = my_size.load(std::memory_order_relaxed); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_size.store(sz, std::memory_order_relaxed); } } allocator_type get_allocator() const { return data.get_allocator(); } private: enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP}; enum operation_status {WAIT = 0, SUCCEEDED, FAILED}; class cpq_operation : public aggregated_operation { public: operation_type type; union { value_type* elem; size_type sz; }; cpq_operation( const value_type& value, operation_type t ) : type(t), elem(const_cast(&value)) {} }; // class cpq_operation class functor { concurrent_priority_queue* my_cpq; public: functor() : my_cpq(nullptr) {} functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {} void operator()(cpq_operation* op_list) { __TBB_ASSERT(my_cpq != nullptr, "Invalid functor"); my_cpq->handle_operations(op_list); } }; // class functor void handle_operations( cpq_operation* op_list ) { call_itt_notify(acquired, this); cpq_operation* tmp, *pop_list = nullptr; __TBB_ASSERT(mark == data.size(), NULL); // First pass processes all constant (amortized; reallocation may happen) time pushes and pops. while(op_list) { // ITT note: &(op_list->status) tag is used to cover accesses to op_list // node. This thread is going to handle the operation, and so will acquire it // and perform the associated operation w/o triggering a race condition; the // thread that created the operation is waiting on the status field, so when // this thread is done with the operation, it will perform a // store_with_release to give control back to the waiting thread in // aggregator::insert_operation. // TODO: enable call_itt_notify(acquired, &(op_list->status)); __TBB_ASSERT(op_list->type != INVALID_OP, NULL); tmp = op_list; op_list = op_list->next.load(std::memory_order_relaxed); if (tmp->type == POP_OP) { if (mark < data.size() && my_compare(data[0], data.back())) { // there are newly pushed elems and the last one is higher than top *(tmp->elem) = std::move(data.back()); my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); data.pop_back(); __TBB_ASSERT(mark <= data.size(), NULL); } else { // no convenient item to pop; postpone tmp->next.store(pop_list, std::memory_order_relaxed); pop_list = tmp; } } else { // PUSH_OP or PUSH_RVALUE_OP __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation"); #if TBB_USE_EXCEPTIONS try #endif { if (tmp->type == PUSH_OP) { push_back_helper(*(tmp->elem)); } else { data.push_back(std::move(*(tmp->elem))); } my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); } #if TBB_USE_EXCEPTIONS catch(...) { tmp->status.store(uintptr_t(FAILED), std::memory_order_release); } #endif } } // Second pass processes pop operations while(pop_list) { tmp = pop_list; pop_list = pop_list->next.load(std::memory_order_relaxed); __TBB_ASSERT(tmp->type == POP_OP, NULL); if (data.empty()) { tmp->status.store(uintptr_t(FAILED), std::memory_order_release); } else { __TBB_ASSERT(mark <= data.size(), NULL); if (mark < data.size() && my_compare(data[0], data.back())) { // there are newly pushed elems and the last one is higher than top *(tmp->elem) = std::move(data.back()); my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); data.pop_back(); } else { // extract top and push last element down heap *(tmp->elem) = std::move(data[0]); my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); reheap(); } } } // heapify any leftover pushed elements before doing the next // batch of operations if (mark < data.size()) heapify(); __TBB_ASSERT(mark == data.size(), NULL); call_itt_notify(releasing, this); } // Merge unsorted elements into heap void heapify() { if (!mark && data.size() > 0) mark = 1; for (; mark < data.size(); ++mark) { // for each unheapified element under size size_type cur_pos = mark; value_type to_place = std::move(data[mark]); do { // push to_place up the heap size_type parent = (cur_pos - 1) >> 1; if (!my_compare(data[parent], to_place)) break; data[cur_pos] = std::move(data[parent]); cur_pos = parent; } while(cur_pos); data[cur_pos] = std::move(to_place); } } // Re-heapify after an extraction // Re-heapify by pushing last element down the heap from the root. void reheap() { size_type cur_pos = 0, child = 1; while(child < mark) { size_type target = child; if (child + 1 < mark && my_compare(data[child], data[child + 1])) ++target; // target now has the higher priority child if (my_compare(data[target], data.back())) break; data[cur_pos] = std::move(data[target]); cur_pos = target; child = (cur_pos << 1) + 1; } if (cur_pos != data.size() - 1) data[cur_pos] = std::move(data.back()); data.pop_back(); if (mark > data.size()) mark = data.size(); } void push_back_helper( const T& value ) { push_back_helper_impl(value, std::is_copy_constructible{}); } void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) { data.push_back(value); } void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) { __TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type"); } using aggregator_type = aggregator; aggregator_type my_aggregator; // Padding added to avoid false sharing char padding1[max_nfs_size - sizeof(aggregator_type)]; // The point at which unsorted elements begin size_type mark; std::atomic my_size; Compare my_compare; // Padding added to avoid false sharing char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)]; //! Storage for the heap of elements in queue, plus unheapified elements /** data has the following structure: binary unheapified heap elements ____|_______|____ | | | v v v [_|...|_|_|...|_| |...| ] 0 ^ ^ ^ | | |__capacity | |__my_size |__mark Thus, data stores the binary heap starting at position 0 through mark-1 (it may be empty). Then there are 0 or more elements that have not yet been inserted into the heap, in positions mark through my_size-1. */ using vector_type = std::vector; vector_type data; friend bool operator==( const concurrent_priority_queue& lhs, const concurrent_priority_queue& rhs ) { return lhs.data == rhs.data; } #if !__TBB_CPP20_COMPARISONS_PRESENT friend bool operator!=( const concurrent_priority_queue& lhs, const concurrent_priority_queue& rhs ) { return !(lhs == rhs); } #endif }; // class concurrent_priority_queue #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename Alloc = tbb::cache_aligned_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() ) -> concurrent_priority_queue, Comp, Alloc>; template >, typename = std::enable_if_t>> concurrent_priority_queue( It, It, Alloc ) -> concurrent_priority_queue, std::less>, Alloc>; template , typename Alloc = tbb::cache_aligned_allocator, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_priority_queue( std::initializer_list, Comp = Comp(), Alloc = Alloc() ) -> concurrent_priority_queue; template >> concurrent_priority_queue( std::initializer_list, Alloc ) -> concurrent_priority_queue, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_priority_queue& lhs, concurrent_priority_queue& rhs ) { lhs.swap(rhs); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::concurrent_priority_queue; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_priority_queue_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/concurrent_queue.h000066400000000000000000000567361514453371700323600ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_queue_H #define __TBB_concurrent_queue_H #include "detail/_namespace_injection.h" #include "detail/_concurrent_queue_base.h" #include "detail/_allocator_traits.h" #include "detail/_exception.h" #include "detail/_containers_helpers.h" #include "cache_aligned_allocator.h" namespace tbb { namespace detail { namespace d2 { // A high-performance thread-safe non-blocking concurrent queue. // Multiple threads may each push and pop concurrently. // Assignment construction is not allowed. template > class concurrent_queue { using allocator_traits_type = tbb::detail::allocator_traits; using queue_representation_type = concurrent_queue_rep; using queue_allocator_type = typename allocator_traits_type::template rebind_alloc; using queue_allocator_traits = tbb::detail::allocator_traits; public: using size_type = std::size_t; using value_type = T; using reference = T&; using const_reference = const T&; using difference_type = std::ptrdiff_t; using allocator_type = Allocator; using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; using iterator = concurrent_queue_iterator; using const_iterator = concurrent_queue_iterator; concurrent_queue() : concurrent_queue(allocator_type()) {} explicit concurrent_queue(const allocator_type& a) : my_allocator(a), my_queue_representation(nullptr) { my_queue_representation = static_cast(r1::cache_aligned_allocate(sizeof(queue_representation_type))); queue_allocator_traits::construct(my_allocator, my_queue_representation); __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); } template concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) : concurrent_queue(a) { for (; begin != end; ++begin) push(*begin); } concurrent_queue(const concurrent_queue& src, const allocator_type& a) : concurrent_queue(a) { my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } concurrent_queue(const concurrent_queue& src) : concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) { my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } // Move constructors concurrent_queue(concurrent_queue&& src) : concurrent_queue(std::move(src.my_allocator)) { internal_swap(src); } concurrent_queue(concurrent_queue&& src, const allocator_type& a) : concurrent_queue(a) { // checking that memory allocated by one instance of allocator can be deallocated // with another if (my_allocator == src.my_allocator) { internal_swap(src); } else { // allocators are different => performing per-element move my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item); src.clear(); } } // Destroy queue ~concurrent_queue() { clear(); my_queue_representation->clear(my_allocator); queue_allocator_traits::destroy(my_allocator, my_queue_representation); r1::cache_aligned_deallocate(my_queue_representation); } // Enqueue an item at tail of queue. void push(const T& value) { internal_push(value); } void push(T&& value) { internal_push(std::move(value)); } template void emplace( Args&&... args ) { internal_push(std::forward(args)...); } // Attempt to dequeue an item from head of queue. /** Does not wait for item to become available. Returns true if successful; false otherwise. */ bool try_pop( T& result ) { return internal_try_pop(&result); } // Return the number of items in the queue; thread unsafe size_type unsafe_size() const { std::ptrdiff_t size = my_queue_representation->size(); return size < 0 ? 0 : size_type(size); } // Equivalent to size()==0. __TBB_nodiscard bool empty() const { return my_queue_representation->empty(); } // Clear the queue. not thread-safe. void clear() { while (!empty()) { T value; try_pop(value); } } // Return allocator object allocator_type get_allocator() const { return my_allocator; } //------------------------------------------------------------------------ // The iterators are intended only for debugging. They are slow and not thread safe. //------------------------------------------------------------------------ iterator unsafe_begin() { return concurrent_queue_iterator_provider::get(*this); } iterator unsafe_end() { return iterator(); } const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get(*this); } const_iterator unsafe_end() const { return const_iterator(); } const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get(*this); } const_iterator unsafe_cend() const { return const_iterator(); } private: void internal_swap(concurrent_queue& src) { using std::swap; swap(my_queue_representation, src.my_queue_representation); } template void internal_push( Args&&... args ) { ticket_type k = my_queue_representation->tail_counter++; my_queue_representation->choose(k).push(k, *my_queue_representation, my_allocator, std::forward(args)...); } bool internal_try_pop( void* dst ) { ticket_type k; do { k = my_queue_representation->head_counter.load(std::memory_order_relaxed); do { if (static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) { // Queue is empty return false; } // Queue had item with ticket k when we looked. Attempt to get that item. // Another thread snatched the item, retry. } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation, my_allocator)); return true; } template friend class concurrent_queue_iterator; static void copy_construct_item(T* location, const void* src) { // TODO: use allocator_traits for copy construction new (location) value_type(*static_cast(src)); // queue_allocator_traits::construct(my_allocator, location, *static_cast(src)); } static void move_construct_item(T* location, const void* src) { // TODO: use allocator_traits for move construction new (location) value_type(std::move(*static_cast(const_cast(src)))); } queue_allocator_type my_allocator; queue_representation_type* my_queue_representation; }; // class concurrent_queue #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT // Deduction guide for the constructor from two iterators template >, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_queue( It, It, Alloc = Alloc() ) -> concurrent_queue, Alloc>; #endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ class concurrent_monitor; // The concurrent monitor tags for concurrent_bounded_queue. static constexpr std::size_t cbq_slots_avail_tag = 0; static constexpr std::size_t cbq_items_avail_tag = 1; } // namespace d2 namespace r1 { class concurrent_monitor; TBB_EXPORT std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); TBB_EXPORT void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); TBB_EXPORT void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag , std::size_t ticket ); TBB_EXPORT void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, d1::delegate_base& predicate ); } // namespace r1 namespace d2 { // A high-performance thread-safe blocking concurrent bounded queue. // Supports boundedness and blocking semantics. // Multiple threads may each push and pop concurrently. // Assignment construction is not allowed. template > class concurrent_bounded_queue { using allocator_traits_type = tbb::detail::allocator_traits; using queue_representation_type = concurrent_queue_rep; using queue_allocator_type = typename allocator_traits_type::template rebind_alloc; using queue_allocator_traits = tbb::detail::allocator_traits; template void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) { d1::delegated_function func(pred); r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func); } public: using size_type = std::ptrdiff_t; using value_type = T; using reference = T&; using const_reference = const T&; using difference_type = std::ptrdiff_t; using allocator_type = Allocator; using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; using iterator = concurrent_queue_iterator; using const_iterator = concurrent_queue_iterator ; concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {} explicit concurrent_bounded_queue( const allocator_type& a ) : my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr) { my_queue_representation = reinterpret_cast( r1::allocate_bounded_queue_rep(sizeof(queue_representation_type))); my_monitors = reinterpret_cast(my_queue_representation + 1); queue_allocator_traits::construct(my_allocator, my_queue_representation); my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2); __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); } template concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) : concurrent_bounded_queue(a) { for (; begin != end; ++begin) push(*begin); } concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) : concurrent_bounded_queue(a) { my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } concurrent_bounded_queue( const concurrent_bounded_queue& src ) : concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) { my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } // Move constructors concurrent_bounded_queue( concurrent_bounded_queue&& src ) : concurrent_bounded_queue(std::move(src.my_allocator)) { internal_swap(src); } concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) : concurrent_bounded_queue(a) { // checking that memory allocated by one instance of allocator can be deallocated // with another if (my_allocator == src.my_allocator) { internal_swap(src); } else { // allocators are different => performing per-element move my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item); src.clear(); } } // Destroy queue ~concurrent_bounded_queue() { clear(); my_queue_representation->clear(my_allocator); queue_allocator_traits::destroy(my_allocator, my_queue_representation); r1::deallocate_bounded_queue_rep(reinterpret_cast(my_queue_representation), sizeof(queue_representation_type)); } // Enqueue an item at tail of queue. void push( const T& value ) { internal_push(value); } void push( T&& value ) { internal_push(std::move(value)); } // Enqueue an item at tail of queue if queue is not already full. // Does not wait for queue to become not full. // Returns true if item is pushed; false if queue was already full. bool try_push( const T& value ) { return internal_push_if_not_full(value); } bool try_push( T&& value ) { return internal_push_if_not_full(std::move(value)); } template void emplace( Args&&... args ) { internal_push(std::forward(args)...); } template bool try_emplace( Args&&... args ) { return internal_push_if_not_full(std::forward(args)...); } // Attempt to dequeue an item from head of queue. /** Does not wait for item to become available. Returns true if successful; false otherwise. */ bool pop( T& result ) { return internal_pop(&result); } bool try_pop( T& result ) { return internal_pop_if_present(&result); } void abort() { internal_abort(); } // Return the number of items in the queue; thread unsafe std::ptrdiff_t size() const { return my_queue_representation->size(); } void set_capacity( size_type new_capacity ) { std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity; my_capacity = c; } size_type capacity() const { return my_capacity; } // Equivalent to size()==0. __TBB_nodiscard bool empty() const { return my_queue_representation->empty(); } // Clear the queue. not thread-safe. void clear() { while (!empty()) { T value; try_pop(value); } } // Return allocator object allocator_type get_allocator() const { return my_allocator; } //------------------------------------------------------------------------ // The iterators are intended only for debugging. They are slow and not thread safe. //------------------------------------------------------------------------ iterator unsafe_begin() { return concurrent_queue_iterator_provider::get(*this); } iterator unsafe_end() { return iterator(); } const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get(*this); } const_iterator unsafe_end() const { return const_iterator(); } const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get(*this); } const_iterator unsafe_cend() const { return const_iterator(); } private: void internal_swap( concurrent_bounded_queue& src ) { std::swap(my_queue_representation, src.my_queue_representation); std::swap(my_monitors, src.my_monitors); } static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2); template void internal_push( Args&&... args ) { unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); ticket_type ticket = my_queue_representation->tail_counter++; std::ptrdiff_t target = ticket - my_capacity; if (static_cast(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full auto pred = [&] { if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { throw_exception(exception_id::user_abort); } return static_cast(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target; }; try_call( [&] { internal_wait(my_monitors, cbq_slots_avail_tag, target, pred); }).on_exception( [&] { my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation, my_allocator); }); } __TBB_ASSERT((static_cast(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr); my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, my_allocator, std::forward(args)...); r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); } template bool internal_push_if_not_full( Args&&... args ) { ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed); do { if (static_cast(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) { // Queue is full return false; } // Queue had empty slot with ticket k when we looked. Attempt to claim that slot. // Another thread claimed the slot, so retry. } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1)); my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, my_allocator, std::forward(args)...); r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); return true; } bool internal_pop( void* dst ) { std::ptrdiff_t target; // This loop is a single pop operation; abort_counter should not be re-read inside unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); do { target = my_queue_representation->head_counter++; if (static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) { auto pred = [&] { if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { throw_exception(exception_id::user_abort); } return static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target; }; try_call( [&] { internal_wait(my_monitors, cbq_items_avail_tag, target, pred); }).on_exception( [&] { my_queue_representation->head_counter--; }); } __TBB_ASSERT(static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr); } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation, my_allocator)); r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target); return true; } bool internal_pop_if_present( void* dst ) { ticket_type ticket; do { ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed); do { if (static_cast(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty // Queue is empty return false; } // Queue had item with ticket k when we looked. Attempt to get that item. // Another thread snatched the item, retry. } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation, my_allocator)); r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); return true; } void internal_abort() { ++my_abort_counter; r1::abort_bounded_queue_monitors(my_monitors); } static void copy_construct_item(T* location, const void* src) { // TODO: use allocator_traits for copy construction new (location) value_type(*static_cast(src)); } static void move_construct_item(T* location, const void* src) { // TODO: use allocator_traits for move construction new (location) value_type(std::move(*static_cast(const_cast(src)))); } template friend class concurrent_queue_iterator; queue_allocator_type my_allocator; std::ptrdiff_t my_capacity; std::atomic my_abort_counter; queue_representation_type* my_queue_representation; r1::concurrent_monitor* my_monitors; }; // class concurrent_bounded_queue #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT // Deduction guide for the constructor from two iterators template >> concurrent_bounded_queue( It, It, Alloc = Alloc() ) -> concurrent_bounded_queue, Alloc>; #endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ } //namespace d2 } // namesapce detail inline namespace v1 { using detail::d2::concurrent_queue; using detail::d2::concurrent_bounded_queue; using detail::r1::user_abort; using detail::r1::bad_last_alloc; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_queue_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/concurrent_set.h000066400000000000000000000243261514453371700320150ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_set_H #define __TBB_concurrent_set_H #include "detail/_namespace_injection.h" #include "detail/_concurrent_skip_list.h" #include "tbb_allocator.h" #include #include namespace tbb { namespace detail { namespace d2 { template struct set_traits { static constexpr std::size_t max_level = RandomGenerator::max_level; using random_level_generator_type = RandomGenerator; using key_type = Key; using value_type = key_type; using compare_type = KeyCompare; using value_compare = compare_type; using reference = value_type&; using const_reference = const value_type&; using allocator_type = Allocator; static constexpr bool allow_multimapping = AllowMultimapping; static const key_type& get_key(const_reference val) { return val; } static value_compare value_comp(compare_type comp) { return comp; } }; // struct set_traits template class concurrent_multiset; template , typename Allocator = tbb::tbb_allocator> class concurrent_set : public concurrent_skip_list, Allocator, false>> { using base_type = concurrent_skip_list, Allocator, false>>; public: using key_type = Key; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using key_compare = Compare; using value_compare = typename base_type::value_compare; using allocator_type = Allocator; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using node_type = typename base_type::node_type; // Include constructors of base_type using base_type::base_type; // Required for implicit deduction guides concurrent_set() = default; concurrent_set( const concurrent_set& ) = default; concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_set( concurrent_set&& ) = default; concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_set& operator=( const concurrent_set& ) = default; concurrent_set& operator=( concurrent_set&& ) = default; concurrent_set& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } template void merge(concurrent_set& source) { this->internal_merge(source); } template void merge(concurrent_set&& source) { this->internal_merge(std::move(source)); } template void merge(concurrent_multiset& source) { this->internal_merge(source); } template void merge(concurrent_multiset&& source) { this->internal_merge(std::move(source)); } }; // class concurrent_set #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() ) -> concurrent_set, Comp, Alloc>; template , typename Alloc = tbb::tbb_allocator, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_set( std::initializer_list, Comp = Comp(), Alloc = Alloc() ) -> concurrent_set; template >, typename = std::enable_if_t>> concurrent_set( It, It, Alloc ) -> concurrent_set, std::less>, Alloc>; template >> concurrent_set( std::initializer_list, Alloc ) -> concurrent_set, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_set& lhs, concurrent_set& rhs ) { lhs.swap(rhs); } template , typename Allocator = tbb::tbb_allocator> class concurrent_multiset : public concurrent_skip_list, Allocator, true>> { using base_type = concurrent_skip_list, Allocator, true>>; public: using key_type = Key; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using key_compare = Compare; using value_compare = typename base_type::value_compare; using allocator_type = Allocator; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using node_type = typename base_type::node_type; // Include constructors of base_type; using base_type::base_type; // Required for implicit deduction guides concurrent_multiset() = default; concurrent_multiset( const concurrent_multiset& ) = default; concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_multiset( concurrent_multiset&& ) = default; concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_multiset& operator=( const concurrent_multiset& ) = default; concurrent_multiset& operator=( concurrent_multiset&& ) = default; concurrent_multiset& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } template void merge(concurrent_set& source) { this->internal_merge(source); } template void merge(concurrent_set&& source) { this->internal_merge(std::move(source)); } template void merge(concurrent_multiset& source) { this->internal_merge(source); } template void merge(concurrent_multiset&& source) { this->internal_merge(std::move(source)); } }; // class concurrent_multiset #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() ) -> concurrent_multiset, Comp, Alloc>; template , typename Alloc = tbb::tbb_allocator, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_multiset( std::initializer_list, Comp = Comp(), Alloc = Alloc() ) -> concurrent_multiset; template >, typename = std::enable_if_t>> concurrent_multiset( It, It, Alloc ) -> concurrent_multiset, std::less>, Alloc>; template >> concurrent_multiset( std::initializer_list, Alloc ) -> concurrent_multiset, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_multiset& lhs, concurrent_multiset& rhs ) { lhs.swap(rhs); } } // namespace d2 } // namespace detail inline namespace v1 { using detail::d2::concurrent_set; using detail::d2::concurrent_multiset; using detail::split; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_set_H concurrent_unordered_map.h000066400000000000000000000447621514453371700337750ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_unordered_map_H #define __TBB_concurrent_unordered_map_H #include "detail/_namespace_injection.h" #include "detail/_concurrent_unordered_base.h" #include "tbb_allocator.h" #include namespace tbb { namespace detail { namespace d1 { template struct concurrent_unordered_map_traits { using value_type = std::pair; using key_type = Key; using allocator_type = Allocator; using hash_compare_type = hash_compare; static constexpr bool allow_multimapping = AllowMultimapping; static constexpr const key_type& get_key( const value_type& value ) { return value.first; } }; // struct concurrent_unordered_map_traits template class concurrent_unordered_multimap; template , typename KeyEqual = std::equal_to, typename Allocator = tbb::tbb_allocator> > class concurrent_unordered_map : public concurrent_unordered_base> { using traits_type = concurrent_unordered_map_traits; using base_type = concurrent_unordered_base; public: using key_type = typename base_type::key_type; using mapped_type = T; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using hasher = typename base_type::hasher; using key_equal = typename base_type::key_equal; using allocator_type = typename base_type::allocator_type; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using local_iterator = typename base_type::local_iterator; using const_local_iterator = typename base_type::const_local_iterator; using node_type = typename base_type::node_type; // Include constructors of base type using base_type::base_type; // Required for implicit deduction guides concurrent_unordered_map() = default; concurrent_unordered_map( const concurrent_unordered_map& ) = default; concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_unordered_map( concurrent_unordered_map&& ) = default; concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default; concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default; concurrent_unordered_map& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } // Observers mapped_type& operator[]( const key_type& key ) { iterator where = this->find(key); if (where == this->end()) { where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; } return where->second; } mapped_type& operator[]( key_type&& key ) { iterator where = this->find(key); if (where == this->end()) { where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; } return where->second; } mapped_type& at( const key_type& key ) { iterator where = this->find(key); if (where == this->end()) { throw_exception(exception_id::invalid_key); } return where->second; } const mapped_type& at( const key_type& key ) const { const_iterator where = this->find(key); if (where == this->end()) { throw_exception(exception_id::out_of_range); } return where->second; } using base_type::insert; template typename std::enable_if::value, std::pair>::type insert( P&& value ) { return this->emplace(std::forward

(value)); } template typename std::enable_if::value, iterator>::type insert( const_iterator hint, P&& value ) { return this->emplace_hint(hint, std::forward

(value)); } template void merge( concurrent_unordered_map& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_map&& source ) { this->internal_merge(std::move(source)); } template void merge( concurrent_unordered_multimap& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_multimap&& source ) { this->internal_merge(std::move(source)); } }; // class concurrent_unordered_map #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename KeyEq = std::equal_to>, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_map( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_map, iterator_mapped_t, Hash, KeyEq, Alloc>; template >, typename KeyEq = std::equal_to>, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_map( std::initializer_list>, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_map, T, Hash, KeyEq, Alloc>; template >, typename = std::enable_if_t>> concurrent_unordered_map( It, It, std::size_t, Alloc ) -> concurrent_unordered_map, iterator_mapped_t, std::hash>, std::equal_to>, Alloc>; // TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed template >, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_map( It, It, std::size_t, Hash, Alloc ) -> concurrent_unordered_map, iterator_mapped_t, Hash, std::equal_to>, Alloc>; template >> concurrent_unordered_map( std::initializer_list>, std::size_t, Alloc ) -> concurrent_unordered_map, T, std::hash>, std::equal_to>, Alloc>; template >> concurrent_unordered_map( std::initializer_list>, Alloc ) -> concurrent_unordered_map, T, std::hash>, std::equal_to>, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_map( std::initializer_list>, std::size_t, Hash, Alloc ) -> concurrent_unordered_map, T, Hash, std::equal_to>, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_unordered_map& lhs, concurrent_unordered_map& rhs ) { lhs.swap(rhs); } template , typename KeyEqual = std::equal_to, typename Allocator = tbb::tbb_allocator> > class concurrent_unordered_multimap : public concurrent_unordered_base> { using traits_type = concurrent_unordered_map_traits; using base_type = concurrent_unordered_base; public: using key_type = typename base_type::key_type; using mapped_type = T; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using hasher = typename base_type::hasher; using key_equal = typename base_type::key_equal; using allocator_type = typename base_type::allocator_type; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using local_iterator = typename base_type::local_iterator; using const_local_iterator = typename base_type::const_local_iterator; using node_type = typename base_type::node_type; // Include constructors of base type using base_type::base_type; using base_type::insert; // Required for implicit deduction guides concurrent_unordered_multimap() = default; concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default; concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default; concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default; concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default; concurrent_unordered_multimap& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } template typename std::enable_if::value, std::pair>::type insert( P&& value ) { return this->emplace(std::forward

(value)); } template typename std::enable_if::value, iterator>::type insert( const_iterator hint, P&& value ) { return this->emplace_hint(hint, std::forward(value)); } template void merge( concurrent_unordered_map& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_map&& source ) { this->internal_merge(std::move(source)); } template void merge( concurrent_unordered_multimap& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_multimap&& source ) { this->internal_merge(std::move(source)); } }; // class concurrent_unordered_multimap #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename KeyEq = std::equal_to>, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_multimap, iterator_mapped_t, Hash, KeyEq, Alloc>; template >, typename KeyEq = std::equal_to>, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multimap( std::initializer_list>, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_multimap, T, Hash, KeyEq, Alloc>; template >, typename = std::enable_if_t>> concurrent_unordered_multimap( It, It, std::size_t, Alloc ) -> concurrent_unordered_multimap, iterator_mapped_t, std::hash>, std::equal_to>, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc ) -> concurrent_unordered_multimap, iterator_mapped_t, Hash, std::equal_to>, Alloc>; template >> concurrent_unordered_multimap( std::initializer_list>, std::size_t, Alloc ) -> concurrent_unordered_multimap, T, std::hash>, std::equal_to>, Alloc>; template >> concurrent_unordered_multimap( std::initializer_list>, Alloc ) -> concurrent_unordered_multimap, T, std::hash>, std::equal_to>, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multimap( std::initializer_list>, std::size_t, Hash, Alloc ) -> concurrent_unordered_multimap, T, Hash, std::equal_to>, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_unordered_multimap& lhs, concurrent_unordered_multimap& rhs ) { lhs.swap(rhs); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::concurrent_unordered_map; using detail::d1::concurrent_unordered_multimap; using detail::split; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_unordered_map_H concurrent_unordered_set.h000066400000000000000000000353541514453371700340100ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_unordered_set_H #define __TBB_concurrent_unordered_set_H #include "detail/_namespace_injection.h" #include "detail/_concurrent_unordered_base.h" #include "tbb_allocator.h" namespace tbb { namespace detail { namespace d1 { template struct concurrent_unordered_set_traits { using key_type = Key; using value_type = key_type; using allocator_type = Allocator; using hash_compare_type = hash_compare; static constexpr bool allow_multimapping = AllowMultimapping; static constexpr const key_type& get_key( const value_type& value ) { return value; } }; // class concurrent_unordered_set_traits template class concurrent_unordered_multiset; template , typename KeyEqual = std::equal_to, typename Allocator = tbb::tbb_allocator> class concurrent_unordered_set : public concurrent_unordered_base> { using traits_type = concurrent_unordered_set_traits; using base_type = concurrent_unordered_base; public: using key_type = typename base_type::key_type; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using hasher = typename base_type::hasher; using key_equal = typename base_type::key_equal; using allocator_type = typename base_type::allocator_type; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using local_iterator = typename base_type::local_iterator; using const_local_iterator = typename base_type::const_local_iterator; using node_type = typename base_type::node_type; // Include constructors of base_type; using base_type::base_type; // Required for implicit deduction guides concurrent_unordered_set() = default; concurrent_unordered_set( const concurrent_unordered_set& ) = default; concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_unordered_set( concurrent_unordered_set&& ) = default; concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default; concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default; concurrent_unordered_set& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } template void merge( concurrent_unordered_set& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_set&& source ) { this->internal_merge(std::move(source)); } template void merge( concurrent_unordered_multiset& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_multiset&& source ) { this->internal_merge(std::move(source)); } }; // class concurrent_unordered_set #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename KeyEq = std::equal_to>, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_set, Hash, KeyEq, Alloc>; template , typename KeyEq = std::equal_to, typename Alloc = tbb::tbb_allocator, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_set( std::initializer_list, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_set; template >, typename = std::enable_if_t>> concurrent_unordered_set( It, It, std::size_t, Alloc ) -> concurrent_unordered_set, std::hash>, std::equal_to>, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_set( It, It, std::size_t, Hash, Alloc ) -> concurrent_unordered_set, Hash, std::equal_to>, Alloc>; template >> concurrent_unordered_set( std::initializer_list, std::size_t, Alloc ) -> concurrent_unordered_set, std::equal_to, Alloc>; template >> concurrent_unordered_set( std::initializer_list, Alloc ) -> concurrent_unordered_set, std::equal_to, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_set( std::initializer_list, std::size_t, Hash, Alloc ) -> concurrent_unordered_set, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_unordered_set& lhs, concurrent_unordered_set& rhs ) { lhs.swap(rhs); } template , typename KeyEqual = std::equal_to, typename Allocator = tbb::tbb_allocator> class concurrent_unordered_multiset : public concurrent_unordered_base> { using traits_type = concurrent_unordered_set_traits; using base_type = concurrent_unordered_base; public: using key_type = typename base_type::key_type; using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using hasher = typename base_type::hasher; using key_equal = typename base_type::key_equal; using allocator_type = typename base_type::allocator_type; using reference = typename base_type::reference; using const_reference = typename base_type::const_reference; using pointer = typename base_type::pointer; using const_pointer = typename base_type::const_pointer; using iterator = typename base_type::iterator; using const_iterator = typename base_type::const_iterator; using local_iterator = typename base_type::local_iterator; using const_local_iterator = typename base_type::const_local_iterator; using node_type = typename base_type::node_type; // Include constructors of base_type; using base_type::base_type; // Required for implicit deduction guides concurrent_unordered_multiset() = default; concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default; concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default; concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} // Required to respect the rule of 5 concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default; concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default; concurrent_unordered_multiset& operator=( std::initializer_list il ) { base_type::operator= (il); return *this; } template void merge( concurrent_unordered_set& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_set&& source ) { this->internal_merge(std::move(source)); } template void merge( concurrent_unordered_multiset& source ) { this->internal_merge(source); } template void merge( concurrent_unordered_multiset&& source ) { this->internal_merge(std::move(source)); } }; // class concurrent_unordered_multiset #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template >, typename KeyEq = std::equal_to>, typename Alloc = tbb::tbb_allocator>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_multiset, Hash, KeyEq, Alloc>; template , typename KeyEq = std::equal_to, typename Alloc = tbb::tbb_allocator, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multiset( std::initializer_list, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) -> concurrent_unordered_multiset; template >, typename = std::enable_if_t>> concurrent_unordered_multiset( It, It, std::size_t, Alloc ) -> concurrent_unordered_multiset, std::hash>, std::equal_to>, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc ) -> concurrent_unordered_multiset, Hash, std::equal_to>, Alloc>; template >> concurrent_unordered_multiset( std::initializer_list, std::size_t, Alloc ) -> concurrent_unordered_multiset, std::equal_to, Alloc>; template >> concurrent_unordered_multiset( std::initializer_list, Alloc ) -> concurrent_unordered_multiset, std::equal_to, Alloc>; template >, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_unordered_multiset( std::initializer_list, std::size_t, Hash, Alloc ) -> concurrent_unordered_multiset, Alloc>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template void swap( concurrent_unordered_multiset& lhs, concurrent_unordered_multiset& rhs ) { lhs.swap(rhs); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::concurrent_unordered_set; using detail::d1::concurrent_unordered_multiset; using detail::split; } // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_unordered_set_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/concurrent_vector.h000066400000000000000000001333031514453371700325200ustar00rootroot00000000000000/* Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_concurrent_vector_H #define __TBB_concurrent_vector_H #include "detail/_namespace_injection.h" #include "detail/_utils.h" #include "detail/_assert.h" #include "detail/_allocator_traits.h" #include "detail/_segment_table.h" #include "detail/_containers_helpers.h" #include "blocked_range.h" #include "cache_aligned_allocator.h" #include #include // std::move_if_noexcept #include #if __TBB_CPP20_COMPARISONS_PRESENT #include #endif namespace tbb { namespace detail { namespace d1 { template class vector_iterator { using vector_type = Vector; public: using value_type = Value; using size_type = typename vector_type::size_type; using difference_type = typename vector_type::difference_type; using pointer = value_type*; using reference = value_type&; using iterator_category = std::random_access_iterator_tag; template friend vector_iterator operator+( typename vector_iterator::difference_type, const vector_iterator& ); template friend typename vector_iterator::difference_type operator-( const vector_iterator&, const vector_iterator& ); template friend bool operator==( const vector_iterator&, const vector_iterator& ); template friend bool operator<( const vector_iterator&, const vector_iterator& ); template friend class vector_iterator; template friend class concurrent_vector; private: vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr ) : my_vector(const_cast(&vector)), my_index(index), my_item(item) {} public: vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr) {} vector_iterator( const vector_iterator& other ) : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item) {} vector_iterator& operator=( const vector_iterator& other ) { my_vector = other.my_vector; my_index = other.my_index; my_item = other.my_item; return *this; } vector_iterator operator+( difference_type offset ) const { return vector_iterator(*my_vector, my_index + offset); } vector_iterator& operator+=( difference_type offset ) { my_index += offset; my_item = nullptr; return *this; } vector_iterator operator-( difference_type offset ) const { return vector_iterator(*my_vector, my_index - offset); } vector_iterator& operator-=( difference_type offset ) { my_index -= offset; my_item = nullptr; return *this; } reference operator*() const { value_type *item = my_item; if (item == nullptr) { item = &my_vector->internal_subscript(my_index); } else { __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache"); } return *item; } pointer operator->() const { return &(operator*()); } reference operator[]( difference_type k ) const { return my_vector->internal_subscript(my_index + k); } vector_iterator& operator++() { ++my_index; if (my_item != nullptr) { if (vector_type::is_first_element_in_segment(my_index)) { // If the iterator crosses a segment boundary, the pointer become invalid // as possibly next segment is in another memory location my_item = nullptr; } else { ++my_item; } } return *this; } vector_iterator operator++(int) { vector_iterator result = *this; ++(*this); return result; } vector_iterator& operator--() { __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector"); --my_index; if (my_item != nullptr) { if (vector_type::is_first_element_in_segment(my_index)) { // If the iterator crosses a segment boundary, the pointer become invalid // as possibly next segment is in another memory location my_item = nullptr; } else { --my_item; } } return *this; } vector_iterator operator--(int) { vector_iterator result = *this; --(*this); return result; } private: // concurrent_vector over which we are iterating. vector_type* my_vector; // Index into the vector size_type my_index; // Caches my_vector *it; // If my_item == nullptr cached value is not available use internal_subscript(my_index) mutable value_type* my_item; }; // class vector_iterator template vector_iterator operator+( typename vector_iterator::difference_type offset, const vector_iterator& v ) { return vector_iterator(*v.my_vector, v.my_index + offset); } template typename vector_iterator::difference_type operator-( const vector_iterator& i, const vector_iterator& j ) { using difference_type = typename vector_iterator::difference_type; return static_cast(i.my_index) - static_cast(j.my_index); } template bool operator==( const vector_iterator& i, const vector_iterator& j ) { return i.my_vector == j.my_vector && i.my_index == j.my_index; } template bool operator!=( const vector_iterator& i, const vector_iterator& j ) { return !(i == j); } template bool operator<( const vector_iterator& i, const vector_iterator& j ) { return i.my_index < j.my_index; } template bool operator>( const vector_iterator& i, const vector_iterator& j ) { return j < i; } template bool operator>=( const vector_iterator& i, const vector_iterator& j ) { return !(i < j); } template bool operator<=( const vector_iterator& i, const vector_iterator& j ) { return !(j < i); } static constexpr std::size_t embedded_table_num_segments = 3; template > class concurrent_vector : private segment_table, embedded_table_num_segments> { using self_type = concurrent_vector; using base_type = segment_table; friend class segment_table; template class generic_range_type : public tbb::blocked_range { using base_type = tbb::blocked_range; public: using value_type = T; using reference = T&; using const_reference = const T&; using iterator = Iterator; using difference_type = std::ptrdiff_t; using base_type::base_type; template generic_range_type( const generic_range_type& r) : blocked_range(r.begin(), r.end(), r.grainsize()) {} generic_range_type( generic_range_type& r, split ) : blocked_range(r, split()) {} }; // class generic_range_type static_assert(std::is_same::value, "value_type of the container must be the same as its allocator's"); using allocator_traits_type = tbb::detail::allocator_traits; // Segment table for concurrent_vector can be extended static constexpr bool allow_table_extending = true; static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value || allocator_traits_type::is_always_equal::value; static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value || allocator_traits_type::is_always_equal::value; public: using value_type = T; using allocator_type = Allocator; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using reference = value_type&; using const_reference = const value_type&; using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; using iterator = vector_iterator; using const_iterator = vector_iterator; using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; using range_type = generic_range_type; using const_range_type = generic_range_type; concurrent_vector() : concurrent_vector(allocator_type()) {} explicit concurrent_vector( const allocator_type& alloc ) noexcept : base_type(alloc) {} explicit concurrent_vector( size_type count, const value_type& value, const allocator_type& alloc = allocator_type() ) : concurrent_vector(alloc) { try_call( [&] { grow_by(count, value); } ).on_exception( [&] { base_type::clear(); }); } explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() ) : concurrent_vector(alloc) { try_call( [&] { grow_by(count); } ).on_exception( [&] { base_type::clear(); }); } template concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() ) : concurrent_vector(alloc) { try_call( [&] { grow_by(first, last); } ).on_exception( [&] { base_type::clear(); }); } concurrent_vector( const concurrent_vector& other ) : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator())) { try_call( [&] { grow_by(other.begin(), other.end()); } ).on_exception( [&] { base_type::clear(); }); } concurrent_vector( const concurrent_vector& other, const allocator_type& alloc ) : base_type(other, alloc) {} concurrent_vector(concurrent_vector&& other) noexcept : base_type(std::move(other)) {} concurrent_vector( concurrent_vector&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} concurrent_vector( std::initializer_list init, const allocator_type& alloc = allocator_type() ) : concurrent_vector(init.begin(), init.end(), alloc) {} ~concurrent_vector() {} // Assignment concurrent_vector& operator=( const concurrent_vector& other ) { base_type::operator=(other); return *this; } concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) { base_type::operator=(std::move(other)); return *this; } concurrent_vector& operator=( std::initializer_list init ) { assign(init); return *this; } void assign( size_type count, const value_type& value ) { destroy_elements(); grow_by(count, value); } template typename std::enable_if::value, void>::type assign( InputIterator first, InputIterator last ) { destroy_elements(); grow_by(first, last); } void assign( std::initializer_list init ) { destroy_elements(); assign(init.begin(), init.end()); } // Concurrent growth iterator grow_by( size_type delta ) { return internal_grow_by_delta(delta); } iterator grow_by( size_type delta, const value_type& value ) { return internal_grow_by_delta(delta, value); } template typename std::enable_if::value, iterator>::type grow_by( ForwardIterator first, ForwardIterator last ) { auto delta = std::distance(first, last); return internal_grow_by_delta(delta, first, last); } iterator grow_by( std::initializer_list init ) { return grow_by(init.begin(), init.end()); } iterator grow_to_at_least( size_type n ) { return internal_grow_to_at_least(n); } iterator grow_to_at_least( size_type n, const value_type& value ) { return internal_grow_to_at_least(n, value); } iterator push_back( const value_type& item ) { return internal_emplace_back(item); } iterator push_back( value_type&& item ) { return internal_emplace_back(std::move(item)); } template iterator emplace_back( Args&&... args ) { return internal_emplace_back(std::forward(args)...); } // Items access reference operator[]( size_type index ) { return internal_subscript(index); } const_reference operator[]( size_type index ) const { return internal_subscript(index); } reference at( size_type index ) { return internal_subscript_with_exceptions(index); } const_reference at( size_type index ) const { return internal_subscript_with_exceptions(index); } // Get range for iterating with parallel algorithms range_type range( size_t grainsize = 1 ) { return range_type(begin(), end(), grainsize); } // Get const range for iterating with parallel algorithms const_range_type range( size_t grainsize = 1 ) const { return const_range_type(begin(), end(), grainsize); } reference front() { return internal_subscript(0); } const_reference front() const { return internal_subscript(0); } reference back() { return internal_subscript(size() - 1); } const_reference back() const { return internal_subscript(size() - 1); } // Iterators iterator begin() { return iterator(*this, 0); } const_iterator begin() const { return const_iterator(*this, 0); } const_iterator cbegin() const { return const_iterator(*this, 0); } iterator end() { return iterator(*this, size()); } const_iterator end() const { return const_iterator(*this, size()); } const_iterator cend() const { return const_iterator(*this, size()); } reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } allocator_type get_allocator() const { return base_type::get_allocator(); } // Storage bool empty() const noexcept { return 0 == size(); } size_type size() const noexcept { return std::min(this->my_size.load(std::memory_order_acquire), capacity()); } size_type max_size() const noexcept { return allocator_traits_type::max_size(base_type::get_allocator()); } size_type capacity() const noexcept { return base_type::capacity(); } void reserve( size_type n ) { if (n == 0) return; if (n > max_size()) { tbb::detail::throw_exception(exception_id::reservation_length_error); } this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1); base_type::reserve(n); } void resize( size_type n ) { internal_resize(n); } void resize( size_type n, const value_type& val ) { internal_resize(n, val); } void shrink_to_fit() { internal_compact(); } void swap(concurrent_vector& other) noexcept(is_noexcept_swap) { base_type::swap(other); } void clear() { destroy_elements(); } private: using segment_type = typename base_type::segment_type; using segment_table_type = typename base_type::segment_table_type; using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits; using segment_index_type = typename base_type::segment_index_type; using segment_element_type = typename base_type::value_type; using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc; using segment_element_allocator_traits = tbb::detail::allocator_traits; segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) { __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table"); // If other threads are trying to set pointers in the short segment, wait for them to finish their // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) { spin_wait_while_eq(embedded_table[i], segment_type(nullptr)); } // It is possible that the table was extend by a thread allocating first_block, need to check this. if (this->get_table() != embedded_table) { return nullptr; } // Allocate long segment table and fill with null pointers segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table); // Copy segment pointers from the embedded table for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) { segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], embedded_table[segment_index].load(std::memory_order_relaxed)); } for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) { segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr); } return new_segment_table; } // create_segment function is required by the segment_table base class segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) { size_type first_block = this->my_first_block.load(std::memory_order_relaxed); // First block allocation if (seg_index < first_block) { // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested if (table[0].load(std::memory_order_acquire) != nullptr) { spin_wait_while_eq(table[seg_index], segment_type(nullptr)); return nullptr; } segment_element_allocator_type segment_allocator(base_type::get_allocator()); segment_type new_segment = nullptr; size_type first_block_size = this->segment_size(first_block); try_call( [&] { new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size); } ).on_exception( [&] { segment_type disabled_segment = nullptr; if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) { size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block; for (size_type i = 1; i < end_segment; ++i) { table[i].store(this->segment_allocation_failure_tag, std::memory_order_release); } } }); segment_type disabled_segment = nullptr; if (table[0].compare_exchange_strong(disabled_segment, new_segment)) { this->extend_table_if_necessary(table, 0, first_block_size); for (size_type i = 1; i < first_block; ++i) { table[i].store(new_segment, std::memory_order_release); } // Other threads can wait on a snapshot of an embedded table, need to fill it. for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) { this->my_embedded_table[i].store(new_segment, std::memory_order_release); } } else if (new_segment != this->segment_allocation_failure_tag) { // Deallocate the memory segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size); // 0 segment is already allocated, then it remains to wait until the segments are filled to requested spin_wait_while_eq(table[seg_index], segment_type(nullptr)); } } else { size_type offset = this->segment_base(seg_index); if (index == offset) { __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment"); segment_element_allocator_type segment_allocator(base_type::get_allocator()); segment_type new_segment = this->segment_allocation_failure_tag; try_call( [&] { new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index)); // Shift base address to simplify access by index new_segment -= this->segment_base(seg_index); } ).on_completion( [&] { table[seg_index].store(new_segment, std::memory_order_release); }); } else { spin_wait_while_eq(table[seg_index], segment_type(nullptr)); } } return nullptr; } // Returns the number of elements in the segment to be destroy size_type number_of_elements_in_segment( segment_index_type seg_index ) { size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed); size_type curr_segment_base = this->segment_base(seg_index); if (seg_index == 0) { return std::min(curr_vector_size, this->segment_size(seg_index)); } else { // Perhaps the segment is allocated, but there are no elements in it. if (curr_vector_size < curr_segment_base) { return 0; } return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base; } } segment_type nullify_segment( segment_table_type table, size_type segment_index ) { segment_type target_segment = table[segment_index].load(std::memory_order_relaxed); if (segment_index >= this->my_first_block) { table[segment_index].store(nullptr, std::memory_order_relaxed); } else { if (segment_index == 0) { for (size_type i = 0; i < this->my_first_block; ++i) { table[i].store(nullptr, std::memory_order_relaxed); } } } return target_segment; } void deallocate_segment( segment_type address, segment_index_type seg_index ) { segment_element_allocator_type segment_allocator(base_type::get_allocator()); size_type first_block = this->my_first_block.load(std::memory_order_relaxed); if (seg_index >= first_block) { segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index)); } else if (seg_index == 0) { size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0); segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate); } } // destroy_segment function is required by the segment_table base class void destroy_segment( segment_type address, segment_index_type seg_index ) { size_type elements_to_destroy = number_of_elements_in_segment(seg_index); segment_element_allocator_type segment_allocator(base_type::get_allocator()); for (size_type i = 0; i < elements_to_destroy; ++i) { segment_element_allocator_traits::destroy(segment_allocator, address + i); } deallocate_segment(address, seg_index); } // copy_segment function is required by the segment_table base class void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) { size_type i = 0; try_call( [&] { for (; i != number_of_elements_in_segment(seg_index); ++i) { segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]); } } ).on_exception( [&] { // Zero-initialize items left not constructed after the exception zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); auto table = this->get_table(); for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { auto curr_segment = table[j].load(std::memory_order_relaxed); if (curr_segment) { zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); } } this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); }); } // move_segment function is required by the segment_table base class void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) { size_type i = 0; try_call( [&] { for (; i != number_of_elements_in_segment(seg_index); ++i) { segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i])); } } ).on_exception( [&] { // Zero-initialize items left not constructed after the exception zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); auto table = this->get_table(); for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { auto curr_segment = table[j].load(std::memory_order_relaxed); if (curr_segment) { zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); } } this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); }); } static constexpr bool is_first_element_in_segment( size_type index ) { // An element is the first in a segment if its index is equal to a power of two return is_power_of_two_at_least(index, 2); } const_reference internal_subscript( size_type index ) const { return const_cast(this)->internal_subscript(index); } reference internal_subscript( size_type index ) { __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index"); return base_type::template internal_subscript(index); } const_reference internal_subscript_with_exceptions( size_type index ) const { return const_cast(this)->internal_subscript_with_exceptions(index); } reference internal_subscript_with_exceptions( size_type index ) { if (index >= this->my_size.load(std::memory_order_acquire)) { tbb::detail::throw_exception(exception_id::out_of_range); } segment_table_type table = this->my_segment_table.load(std::memory_order_acquire); size_type seg_index = this->segment_index_of(index); if (base_type::number_of_segments(table) < seg_index) { tbb::detail::throw_exception(exception_id::out_of_range); } if (table[seg_index] <= this->segment_allocation_failure_tag) { tbb::detail::throw_exception(exception_id::out_of_range); } return base_type::template internal_subscript(index); } static void zero_unconstructed_elements( pointer start, size_type count ) { std::memset(static_cast(start), 0, count * sizeof(value_type)); } template iterator internal_emplace_back( Args&&... args ) { size_type old_size = this->my_size++; this->assign_first_block_if_necessary(default_first_block_size); auto element_address = &base_type::template internal_subscript(old_size); // try_call API is not convenient here due to broken // variadic capture on GCC 4.8.5 auto value_guard = make_raii_guard([&] { zero_unconstructed_elements(element_address, /*count =*/1); }); segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward(args)...); value_guard.dismiss(); return iterator(*this, old_size, element_address); } template void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) { static_assert(sizeof...(Args) < 2, "Too many parameters"); for (size_type idx = start_idx; idx < end_idx; ++idx) { auto element_address = &base_type::template internal_subscript(idx); // try_call API is not convenient here due to broken // variadic capture on GCC 4.8.5 auto value_guard = make_raii_guard( [&] { segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); size_type segment_size = this->segment_size(last_allocated_segment); end_idx = end_idx < segment_size ? end_idx : segment_size; for (size_type i = idx; i < end_idx; ++i) { zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); } }); segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...); value_guard.dismiss(); } } template void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) { for (size_type idx = start_idx; idx < end_idx; ++idx) { auto element_address = &base_type::template internal_subscript(idx); try_call( [&] { segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++); } ).on_exception( [&] { segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); size_type segment_size = this->segment_size(last_allocated_segment); end_idx = end_idx < segment_size ? end_idx : segment_size; for (size_type i = idx; i < end_idx; ++i) { zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); } }); } } template iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) { this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1); size_type seg_index = this->segment_index_of(end_idx - 1); segment_table_type table = this->get_table(); this->extend_table_if_necessary(table, start_idx, end_idx); if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) { // So that other threads be able to work with the last segment of grow_by, allocate it immediately. // If the last segment is not less than the first block if (table[seg_index].load(std::memory_order_relaxed) == nullptr) { size_type first_element = this->segment_base(seg_index); if (first_element >= start_idx && first_element < end_idx) { segment_type segment = table[seg_index].load(std::memory_order_relaxed); base_type::enable_segment(segment, table, seg_index, first_element); } } } internal_loop_construct(table, start_idx, end_idx, args...); return iterator(*this, start_idx, &base_type::template internal_subscript(start_idx)); } template iterator internal_grow_by_delta( size_type delta, const Args&... args ) { if (delta == size_type(0)) { return end(); } size_type start_idx = this->my_size.fetch_add(delta); size_type end_idx = start_idx + delta; return internal_grow(start_idx, end_idx, args...); } template iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) { size_type old_size = this->my_size.load(std::memory_order_relaxed); if (new_size == size_type(0)) return iterator(*this, 0); while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size)) {} int delta = static_cast(new_size) - static_cast(old_size); if (delta > 0) { return internal_grow(old_size, new_size, args...); } size_type end_segment = this->segment_index_of(new_size - 1); // Check/wait for segments allocation completes if (end_segment >= this->pointers_per_embedded_table && this->get_table() == this->my_embedded_table) { spin_wait_while_eq(this->my_segment_table, this->my_embedded_table); } for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) { if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { atomic_backoff backoff(true); while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { backoff.pause(); } } } #if TBB_USE_DEBUG size_type cap = capacity(); __TBB_ASSERT( cap >= new_size, NULL); #endif return iterator(*this, size()); } template void internal_resize( size_type n, const Args&... args ) { if (n == 0) { clear(); return; } size_type old_size = this->my_size.load(std::memory_order_acquire); if (n > old_size) { reserve(n); grow_to_at_least(n, args...); } else { if (old_size == n) { return; } size_type last_segment = this->segment_index_of(old_size - 1); // Delete segments for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) { this->delete_segment(seg_idx); } // If n > segment_size(n) => we need to destroy all of the items in the first segment // Otherwise, we need to destroy only items with the index < n size_type n_segment = this->segment_index_of(n - 1); size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size); // Destroy elements in curr segment for (size_type idx = n; idx < last_index_to_destroy; ++idx) { segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript(idx)); } this->my_size.store(n, std::memory_order_release); } } void destroy_elements() { allocator_type alloc(base_type::get_allocator()); for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) { allocator_traits_type::destroy(alloc, &base_type::template internal_subscript(i)); } this->my_size.store(0, std::memory_order_relaxed); } static bool incompact_predicate( size_type size ) { // memory page size const size_type page_size = 4096; return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128); } void internal_compact() { const size_type curr_size = this->my_size.load(std::memory_order_relaxed); segment_table_type table = this->get_table(); const segment_index_type k_end = this->find_last_allocated_segment(table); // allocated segments const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;.. const segment_index_type first_block = this->my_first_block; // number of merged segments, getting values from atomics segment_index_type k = first_block; if (k_stop < first_block) { k = k_stop; } else { while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++; } if (k_stop == k_end && k == first_block) { return; } // First segment optimization if (k != first_block && k) { size_type max_block = std::max(first_block, k); auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block); for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx], table[seg_idx].load(std::memory_order_relaxed)); table[seg_idx].store(nullptr, std::memory_order_relaxed); } this->my_first_block.store(k, std::memory_order_relaxed); size_type index = 0; try_call( [&] { for (; index < std::min(this->segment_size(max_block), curr_size); ++index) { auto element_address = &static_cast(this)->operator[](index); segment_index_type seg_idx = this->segment_index_of(index); segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index])); } } ).on_exception( [&] { segment_element_allocator_type allocator(base_type::get_allocator()); for (size_type i = 0; i < index; ++i) { auto element_adress = &this->operator[](i); segment_element_allocator_traits::destroy(allocator, element_adress); } segment_element_allocator_traits::deallocate(allocator, table[0].load(std::memory_order_relaxed), this->segment_size(max_block)); for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed), std::memory_order_relaxed); buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed); } segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block); this->my_first_block.store(first_block, std::memory_order_relaxed); }); // Need to correct deallocate old segments // Method destroy_segment respect active first_block, therefore, // in order for the segment deletion to work correctly, set the first_block size that was earlier, // destroy the unnecessary segments. this->my_first_block.store(first_block, std::memory_order_relaxed); for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) { auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed); if (curr_segment != nullptr) { destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1), seg_idx - 1); } } this->my_first_block.store(k, std::memory_order_relaxed); for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]); } segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block); } // free unnecessary segments allocated by reserve() call if (k_stop < k_end) { for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) { if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) { this->delete_segment(seg_idx - 1); } } if (!k) this->my_first_block.store(0, std::memory_order_relaxed);; } } // Lever for adjusting the size of first_block at the very first insertion. // TODO: consider >1 value, check performance static constexpr size_type default_first_block_size = 1; template friend class vector_iterator; }; // class concurrent_vector #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT // Deduction guide for the constructor from two iterators template >, typename = std::enable_if_t>, typename = std::enable_if_t>> concurrent_vector( It, It, Alloc = Alloc() ) -> concurrent_vector, Alloc>; #endif template void swap(concurrent_vector &lhs, concurrent_vector &rhs) { lhs.swap(rhs); } template bool operator==(const concurrent_vector &lhs, const concurrent_vector &rhs) { return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); } #if !__TBB_CPP20_COMPARISONS_PRESENT template bool operator!=(const concurrent_vector &lhs, const concurrent_vector &rhs) { return !(lhs == rhs); } #endif // !__TBB_CPP20_COMPARISONS_PRESENT #if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT template tbb::detail::synthesized_three_way_result::value_type> operator<=>(const concurrent_vector &lhs, const concurrent_vector &rhs) { return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), tbb::detail::synthesized_three_way_comparator{}); } #else template bool operator<(const concurrent_vector &lhs, const concurrent_vector &rhs) { return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); } template bool operator<=(const concurrent_vector &lhs, const concurrent_vector &rhs) { return !(rhs < lhs); } template bool operator>(const concurrent_vector &lhs, const concurrent_vector &rhs) { return rhs < lhs; } template bool operator>=(const concurrent_vector &lhs, const concurrent_vector &rhs) { return !(lhs < rhs); } #endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::concurrent_vector; } // namespace v1 } // namespace tbb #endif // __TBB_concurrent_vector_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/000077500000000000000000000000001514453371700300425ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_aggregator.h000066400000000000000000000166121514453371700325020ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__aggregator_H #define __TBB_detail__aggregator_H #include "_assert.h" #include "_utils.h" #include #if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc #include "../profiling.h" #endif namespace tbb { namespace detail { namespace d1 { // Base class for aggregated operation template class aggregated_operation { public: // Zero value means "wait" status, all other values are "user" specified values and // are defined into the scope of a class which uses "status" std::atomic status; std::atomic next; aggregated_operation() : status{}, next(nullptr) {} }; // class aggregated_operation // Aggregator base class /* An aggregator for collecting operations coming from multiple sources and executing them serially on a single thread. OperationType must be derived from aggregated_operation. The parameter HandlerType is a functor that will be passed the list of operations and is expected to handle each operation appropriately, setting the status of each operation to non-zero. */ template class aggregator_generic { public: aggregator_generic() : pending_operations(nullptr), handler_busy(false) {} // Execute an operation /* Places an operation into the waitlist (pending_operations), and either handles the list, or waits for the operation to complete, or returns. The long_life_time parameter specifies the life time of the given operation object. Operations with long_life_time == true may be accessed after execution. A "short" life time operation (long_life_time == false) can be destroyed during execution, and so any access to it after it was put into the waitlist, including status check, is invalid. As a consequence, waiting for completion of such operation causes undefined behavior. */ template void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) { // op->status should be read before inserting the operation into the // aggregator waitlist since it can become invalid after executing a // handler (if the operation has 'short' life time.) const uintptr_t status = op->status.load(std::memory_order_relaxed); // ITT note: &(op->status) tag is used to cover accesses to this op node. This // thread has created the operation, and now releases it so that the handler // thread may handle the associated operation w/o triggering a race condition; // thus this tag will be acquired just before the operation is handled in the // handle_operations functor. call_itt_notify(releasing, &(op->status)); // insert the operation in the queue. OperationType* res = pending_operations.load(std::memory_order_relaxed); do { op->next.store(res, std::memory_order_relaxed); } while (!pending_operations.compare_exchange_strong(res, op)); if (!res) { // first in the list; handle the operations // ITT note: &pending_operations tag covers access to the handler_busy flag, // which this waiting handler thread will try to set before entering // handle_operations. call_itt_notify(acquired, &pending_operations); start_handle_operations(handle_operations); // The operation with 'short' life time can already be destroyed if (long_life_time) __TBB_ASSERT(op->status.load(std::memory_order_relaxed), NULL); } // Not first; wait for op to be ready else if (!status) { // operation is blocking here. __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing"); call_itt_notify(prepare, &(op->status)); spin_wait_while_eq(op->status, uintptr_t(0)); } } private: // Trigger the handling of operations when the handler is free template void start_handle_operations( HandlerType& handle_operations ) { OperationType* op_list; // ITT note: &handler_busy tag covers access to pending_operations as it is passed // between active and waiting handlers. Below, the waiting handler waits until // the active handler releases, and the waiting handler acquires &handler_busy as // it becomes the active_handler. The release point is at the end of this // function, when all operations in pending_operations have been handled by the // owner of this aggregator. call_itt_notify(prepare, &handler_busy); // get the handler_busy: // only one thread can possibly spin here at a time spin_wait_until_eq(handler_busy, uintptr_t(0)); call_itt_notify(acquired, &handler_busy); // acquire fence not necessary here due to causality rule and surrounding atomics handler_busy.store(1, std::memory_order_relaxed); // ITT note: &pending_operations tag covers access to the handler_busy flag // itself. Capturing the state of the pending_operations signifies that // handler_busy has been set and a new active handler will now process that list's // operations. call_itt_notify(releasing, &pending_operations); // grab pending_operations op_list = pending_operations.exchange(nullptr); // handle all the operations handle_operations(op_list); // release the handler handler_busy.store(0, std::memory_order_release); } // An atomically updated list (aka mailbox) of pending operations std::atomic pending_operations; // Controls threads access to handle_operations std::atomic handler_busy; }; // class aggregator_generic template class aggregator : public aggregator_generic { HandlerType handle_operations; public: aggregator() = default; void initialize_handler( HandlerType h ) { handle_operations = h; } void execute(OperationType* op) { aggregator_generic::execute(op, handle_operations); } }; // class aggregator // the most-compatible friend declaration (vs, gcc, icc) is // template friend class aggregating_functor; template class aggregating_functor { AggregatingClass* my_object; public: aggregating_functor() = default; aggregating_functor( AggregatingClass* object ) : my_object(object) { __TBB_ASSERT(my_object, nullptr); } void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); } }; // class aggregating_functor } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__aggregator_H _aligned_space.h000066400000000000000000000025671514453371700330630ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_aligned_space_H #define __TBB_aligned_space_H #include #include "_template_helpers.h" namespace tbb { namespace detail { inline namespace d0 { //! Block of space aligned sufficiently to construct an array T with N elements. /** The elements are not constructed or destroyed by this class. @ingroup memory_allocation */ template class aligned_space { alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)]; public: //! Pointer to beginning of array T* begin() const { return punned_cast(&aligned_array); } //! Pointer to one past last element in array. T* end() const { return begin() + N; } }; } // namespace d0 } // namespace detail } // namespace tbb #endif /* __TBB_aligned_space_H */ _allocator_traits.h000066400000000000000000000073371514453371700336530ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__allocator_traits_H #define __TBB_detail__allocator_traits_H #include "_config.h" #include "_template_helpers.h" #include #include namespace tbb { namespace detail { inline namespace d0 { #if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT // Struct is_always_equal_detector provides the member type "type" which is // Allocator::is_always_equal if it is present, std::false_type otherwise template struct is_always_equal_detector { using type = std::false_type; }; template struct is_always_equal_detector> { using type = typename Allocator::is_always_equal; }; #endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT template class allocator_traits : public std::allocator_traits { using base_type = std::allocator_traits; public: #if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT using is_always_equal = typename is_always_equal_detector::type; #endif template using rebind_traits = typename tbb::detail::allocator_traits>; }; // struct allocator_traits template void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) { lhs = rhs; } template void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {} // Copy assigns allocators only if propagate_on_container_copy_assignment is true template void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) { using pocca_type = typename allocator_traits::propagate_on_container_copy_assignment; copy_assign_allocators_impl(lhs, rhs, pocca_type()); } template void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) { lhs = std::move(rhs); } template void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {} // Move assigns allocators only if propagate_on_container_move_assignment is true template void move_assign_allocators( Allocator& lhs, Allocator& rhs ) { using pocma_type = typename allocator_traits::propagate_on_container_move_assignment; move_assign_allocators_impl(lhs, rhs, pocma_type()); } template void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) { using std::swap; swap(lhs, rhs); } template void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {} // Swaps allocators only if propagate_on_container_swap is true template void swap_allocators( Allocator& lhs, Allocator& rhs ) { using pocs_type = typename allocator_traits::propagate_on_container_swap; swap_allocators_impl(lhs, rhs, pocs_type()); } } // inline namespace d0 } // namespace detail } // namespace tbb #endif // __TBB_detail__allocator_traits_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_assert.h000066400000000000000000000045301514453371700316550ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__assert_H #define __TBB_detail__assert_H #include "_config.h" #if __TBBMALLOC_BUILD namespace rml { namespace internal { #else namespace tbb { namespace detail { namespace r1 { #endif //! Process an assertion failure. /** Normally called from __TBB_ASSERT macro. If assertion handler is null, print message for assertion failure and abort. Otherwise call the assertion handler. */ TBB_EXPORT void __TBB_EXPORTED_FUNC assertion_failure(const char* location, int line, const char* expression, const char* comment); #if __TBBMALLOC_BUILD }} // namespaces rml::internal #else } // namespace r1 } // namespace detail } // namespace tbb #endif #if __TBBMALLOC_BUILD //! Release version of assertions #define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : rml::internal::assertion_failure(__func__,__LINE__,#predicate,message)) #else #define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__func__,__LINE__,#predicate,message)) #endif #if TBB_USE_ASSERT //! Assert that predicate is true. /** If predicate is false, print assertion failure message. If the comment argument is not NULL, it is printed as part of the failure message. The comment argument has no other effect. */ #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message) //! "Extended" version #define __TBB_ASSERT_EX __TBB_ASSERT #else //! No-op version of __TBB_ASSERT. #define __TBB_ASSERT(predicate,comment) ((void)0) //! "Extended" version is useful to suppress warnings if a variable is only used with an assert #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) #endif // TBB_USE_ASSERT #endif // __TBB_detail__assert_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_attach.h000066400000000000000000000015351514453371700316220ustar00rootroot00000000000000/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__attach_H #define __TBB_detail__attach_H #include "_config.h" namespace tbb { namespace detail { namespace d1 { struct attach {}; } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__attach_H _concurrent_queue_base.h000066400000000000000000000633701514453371700346640ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__concurrent_queue_base_H #define __TBB_detail__concurrent_queue_base_H #include "_utils.h" #include "_exception.h" #include "_machine.h" #include "_allocator_traits.h" #include "../profiling.h" #include "../spin_mutex.h" #include "../cache_aligned_allocator.h" #include namespace tbb { namespace detail { namespace d2 { using ticket_type = std::size_t; template inline bool is_valid_page(const Page p) { return reinterpret_cast(p) > 1; } template struct concurrent_queue_rep; template class micro_queue_pop_finalizer; #if _MSC_VER && !defined(__INTEL_COMPILER) // unary minus operator applied to unsigned type, result still unsigned #pragma warning( push ) #pragma warning( disable: 4146 ) #endif // A queue using simple locking. // For efficiency, this class has no constructor. // The caller is expected to zero-initialize it. template class micro_queue { private: using queue_rep_type = concurrent_queue_rep; using self_type = micro_queue; public: using size_type = std::size_t; using value_type = T; using reference = value_type&; using const_reference = const value_type&; using allocator_type = Allocator; using allocator_traits_type = tbb::detail::allocator_traits; using queue_allocator_type = typename allocator_traits_type::template rebind_alloc; static constexpr size_type item_size = sizeof(T); static constexpr size_type items_per_page = item_size <= 8 ? 32 : item_size <= 16 ? 16 : item_size <= 32 ? 8 : item_size <= 64 ? 4 : item_size <= 128 ? 2 : 1; struct padded_page { padded_page() {} ~padded_page() {} reference operator[] (std::size_t index) { __TBB_ASSERT(index < items_per_page, "Index out of range"); return items[index]; } const_reference operator[] (std::size_t index) const { __TBB_ASSERT(index < items_per_page, "Index out of range"); return items[index]; } padded_page* next{ nullptr }; std::atomic mask{}; union { value_type items[items_per_page]; }; }; // struct padded_page using page_allocator_type = typename allocator_traits_type::template rebind_alloc; protected: using page_allocator_traits = tbb::detail::allocator_traits; public: using item_constructor_type = void (*)(value_type* location, const void* src); micro_queue() = default; micro_queue( const micro_queue& ) = delete; micro_queue& operator=( const micro_queue& ) = delete; size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator, padded_page*& p ) { __TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page"); k &= -queue_rep_type::n_queue; size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page); if (!index) { try_call( [&] { p = page_allocator_traits::allocate(page_allocator, 1); }).on_exception( [&] { ++base.n_invalid_entries; invalidate_page( k ); }); page_allocator_traits::construct(page_allocator, p); } if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); d1::call_itt_notify(d1::acquired, &tail_counter); if (p) { spin_mutex::scoped_lock lock( page_mutex ); padded_page* q = tail_page.load(std::memory_order_relaxed); if (is_valid_page(q)) { q->next = p; } else { head_page.store(p, std::memory_order_relaxed); } tail_page.store(p, std::memory_order_release); } else { p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? } return index; } template void push( ticket_type k, queue_rep_type& base, queue_allocator_type& allocator, Args&&... args ) { padded_page* p = nullptr; page_allocator_type page_allocator(allocator); size_type index = prepare_page(k, base, page_allocator, p); __TBB_ASSERT(p != nullptr, "Page was not prepared"); // try_call API is not convenient here due to broken // variadic capture on GCC 4.8.5 auto value_guard = make_raii_guard([&] { ++base.n_invalid_entries; d1::call_itt_notify(d1::releasing, &tail_counter); tail_counter.fetch_add(queue_rep_type::n_queue); }); page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward(args)...); // If no exception was thrown, mark item as present. p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed); d1::call_itt_notify(d1::releasing, &tail_counter); value_guard.dismiss(); tail_counter.fetch_add(queue_rep_type::n_queue); } void abort_push( ticket_type k, queue_rep_type& base, queue_allocator_type& allocator ) { padded_page* p = nullptr; prepare_page(k, base, allocator, p); ++base.n_invalid_entries; tail_counter.fetch_add(queue_rep_type::n_queue); } bool pop( void* dst, ticket_type k, queue_rep_type& base, queue_allocator_type& allocator) { k &= -queue_rep_type::n_queue; spin_wait_until_eq(head_counter, k); d1::call_itt_notify(d1::acquired, &head_counter); spin_wait_while_eq(tail_counter, k); d1::call_itt_notify(d1::acquired, &tail_counter); padded_page *p = head_page.load(std::memory_order_acquire); __TBB_ASSERT( p, nullptr ); size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); bool success = false; { page_allocator_type page_allocator(allocator); micro_queue_pop_finalizer finalizer(*this, page_allocator, k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr ); if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) { success = true; assign_and_destroy_item( dst, *p, index ); } else { --base.n_invalid_entries; } } return success; } micro_queue& assign( const micro_queue& src, queue_allocator_type& allocator, item_constructor_type construct_item ) { head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); const padded_page* srcp = src.head_page.load(std::memory_order_relaxed); if( is_valid_page(srcp) ) { ticket_type g_index = head_counter.load(std::memory_order_relaxed); size_type n_items = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed)) / queue_rep_type::n_queue; size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page; try_call( [&] { head_page.store(make_copy(allocator, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); }).on_exception( [&] { head_counter.store(0, std::memory_order_relaxed); tail_counter.store(0, std::memory_order_relaxed); }); padded_page* cur_page = head_page.load(std::memory_order_relaxed); try_call( [&] { if (srcp != src.tail_page.load(std::memory_order_relaxed)) { for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) { cur_page->next = make_copy( allocator, srcp, 0, items_per_page, g_index, construct_item ); cur_page = cur_page->next; } __TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr ); size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); if( last_index==0 ) last_index = items_per_page; cur_page->next = make_copy( allocator, srcp, 0, last_index, g_index, construct_item ); cur_page = cur_page->next; } tail_page.store(cur_page, std::memory_order_relaxed); }).on_exception( [&] { padded_page* invalid_page = reinterpret_cast(std::uintptr_t(1)); tail_page.store(invalid_page, std::memory_order_relaxed); }); } else { head_page.store(nullptr, std::memory_order_relaxed); tail_page.store(nullptr, std::memory_order_relaxed); } return *this; } padded_page* make_copy( queue_allocator_type& allocator, const padded_page* src_page, size_type begin_in_page, size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item ) { page_allocator_type page_allocator(allocator); padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1); new_page->next = nullptr; new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed); for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) { if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) { copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item); } } return new_page; } void invalidate_page( ticket_type k ) { // Append an invalid page at address 1 so that no more pushes are allowed. padded_page* invalid_page = reinterpret_cast(std::uintptr_t(1)); { spin_mutex::scoped_lock lock( page_mutex ); tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed); padded_page* q = tail_page.load(std::memory_order_relaxed); if (is_valid_page(q)) { q->next = invalid_page; } else { head_page.store(invalid_page, std::memory_order_relaxed); } tail_page.store(invalid_page, std::memory_order_relaxed); } } padded_page* get_tail_page() { return tail_page.load(std::memory_order_relaxed); } padded_page* get_head_page() { return head_page.load(std::memory_order_relaxed); } void set_tail_page( padded_page* pg ) { tail_page.store(pg, std::memory_order_relaxed); } void clear(queue_allocator_type& allocator ) { padded_page* curr_page = head_page.load(std::memory_order_relaxed); std::size_t index = head_counter.load(std::memory_order_relaxed); page_allocator_type page_allocator(allocator); while (curr_page) { for (; index != items_per_page - 1; ++index) { curr_page->operator[](index).~value_type(); } padded_page* next_page = curr_page->next; page_allocator_traits::destroy(page_allocator, curr_page); page_allocator_traits::deallocate(page_allocator, curr_page, 1); curr_page = next_page; } padded_page* invalid_page = reinterpret_cast(std::uintptr_t(1)); head_page.store(invalid_page, std::memory_order_relaxed); tail_page.store(invalid_page, std::memory_order_relaxed); } private: // template friend class micro_queue_pop_finalizer; // Class used to ensure exception-safety of method "pop" class destroyer { value_type& my_value; public: destroyer( reference value ) : my_value(value) {} destroyer( const destroyer& ) = delete; destroyer& operator=( const destroyer& ) = delete; ~destroyer() {my_value.~T();} }; // class destroyer void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex, item_constructor_type construct_item ) { auto& src_item = src[sindex]; construct_item( &dst[dindex], static_cast(&src_item) ); } void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) { auto& from = src[index]; destroyer d(from); *static_cast(dst) = std::move(from); } void spin_wait_until_my_turn( std::atomic& counter, ticket_type k, queue_rep_type& rb ) const { for (atomic_backoff b(true);; b.pause()) { ticket_type c = counter; if (c == k) return; else if (c & 1) { ++rb.n_invalid_entries; throw_exception( exception_id::bad_last_alloc); } } } std::atomic head_page{}; std::atomic head_counter{}; std::atomic tail_page{}; std::atomic tail_counter{}; spin_mutex page_mutex{}; }; // class micro_queue #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning( pop ) #endif // warning 4146 is back template class micro_queue_pop_finalizer { public: using padded_page = typename Container::padded_page; using allocator_type = Allocator; using allocator_traits_type = tbb::detail::allocator_traits; micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) : my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc) {} micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete; micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete; ~micro_queue_pop_finalizer() { padded_page* p = my_page; if( is_valid_page(p) ) { spin_mutex::scoped_lock lock( my_queue.page_mutex ); padded_page* q = p->next; my_queue.head_page.store(q, std::memory_order_release); if( !is_valid_page(q) ) { my_queue.tail_page.store(nullptr, std::memory_order_release); } } my_queue.head_counter.store(my_ticket_type, std::memory_order_release); if ( is_valid_page(p) ) { allocator_traits_type::destroy(allocator, static_cast(p)); allocator_traits_type::deallocate(allocator, static_cast(p), 1); } } private: ticket_type my_ticket_type; Container& my_queue; padded_page* my_page; Allocator& allocator; }; // class micro_queue_pop_finalizer #if _MSC_VER && !defined(__INTEL_COMPILER) // structure was padded due to alignment specifier #pragma warning( push ) #pragma warning( disable: 4324 ) #endif template struct concurrent_queue_rep { using self_type = concurrent_queue_rep; using size_type = std::size_t; using micro_queue_type = micro_queue; using allocator_type = Allocator; using allocator_traits_type = tbb::detail::allocator_traits; using padded_page = typename micro_queue_type::padded_page; using page_allocator_type = typename micro_queue_type::page_allocator_type; using item_constructor_type = typename micro_queue_type::item_constructor_type; private: using page_allocator_traits = tbb::detail::allocator_traits; using queue_allocator_type = typename allocator_traits_type::template rebind_alloc; public: // must be power of 2 static constexpr size_type n_queue = 8; // Approximately n_queue/golden ratio static constexpr size_type phi = 3; static constexpr size_type item_size = micro_queue_type::item_size; static constexpr size_type items_per_page = micro_queue_type::items_per_page; concurrent_queue_rep() {} concurrent_queue_rep( const concurrent_queue_rep& ) = delete; concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete; void clear( queue_allocator_type& alloc ) { page_allocator_type page_allocator(alloc); for (size_type i = 0; i < n_queue; ++i) { padded_page* tail_page = array[i].get_tail_page(); if( is_valid_page(tail_page) ) { __TBB_ASSERT(array[i].get_head_page() == tail_page, "at most one page should remain" ); page_allocator_traits::destroy(page_allocator, static_cast(tail_page)); page_allocator_traits::deallocate(page_allocator, static_cast(tail_page), 1); array[i].set_tail_page(nullptr); } else { __TBB_ASSERT(!is_valid_page(array[i].get_head_page()), "head page pointer corrupt?"); } } } void assign( const concurrent_queue_rep& src, queue_allocator_type& alloc, item_constructor_type construct_item ) { head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed); // copy or move micro_queues size_type queue_idx = 0; try_call( [&] { for (; queue_idx < n_queue; ++queue_idx) { array[queue_idx].assign(src.array[queue_idx], alloc, construct_item); } }).on_exception( [&] { for (size_type i = 0; i < queue_idx + 1; ++i) { array[i].clear(alloc); } head_counter.store(0, std::memory_order_relaxed); tail_counter.store(0, std::memory_order_relaxed); n_invalid_entries.store(0, std::memory_order_relaxed); }); __TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) && tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed), "the source concurrent queue should not be concurrently modified." ); } bool empty() const { ticket_type tc = tail_counter.load(std::memory_order_acquire); ticket_type hc = head_counter.load(std::memory_order_relaxed); // if tc!=r.tail_counter, the queue was not empty at some point between the two reads. return tc == tail_counter.load(std::memory_order_relaxed) && std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0; } std::ptrdiff_t size() const { __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL); std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire); std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed); std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed); return tc - hc - nie; } friend class micro_queue; // Map ticket_type to an array index static size_type index( ticket_type k ) { return k * phi % n_queue; } micro_queue_type& choose( ticket_type k ) { // The formula here approximates LRU in a cache-oblivious way. return array[index(k)]; } alignas(max_nfs_size) micro_queue_type array[n_queue]; alignas(max_nfs_size) std::atomic head_counter{}; alignas(max_nfs_size) std::atomic tail_counter{}; alignas(max_nfs_size) std::atomic n_invalid_entries{}; }; // class concurrent_queue_rep #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning( pop ) #endif template class concurrent_queue_iterator_base { using queue_rep_type = concurrent_queue_rep; using padded_page = typename queue_rep_type::padded_page; protected: concurrent_queue_iterator_base() = default; concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) { assign(other); } concurrent_queue_iterator_base( queue_rep_type* queue_rep ) : my_queue_rep(queue_rep), my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed)) { for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { my_array[i] = my_queue_rep->array[i].get_head_page(); } if (!get_item(my_item, my_head_counter)) advance(); } void assign( const concurrent_queue_iterator_base& other ) { my_item = other.my_item; my_queue_rep = other.my_queue_rep; if (my_queue_rep != nullptr) { my_head_counter = other.my_head_counter; for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { my_array[i] = other.my_array[i]; } } } void advance() { __TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue"); std::size_t k = my_head_counter; #if TBB_USE_ASSERT Value* tmp; get_item(tmp, k); __TBB_ASSERT(my_item == tmp, nullptr); #endif std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); if (i == my_queue_rep->items_per_page - 1) { padded_page*& root = my_array[queue_rep_type::index(k)]; root = root->next; } // Advance k my_head_counter = ++k; if (!get_item(my_item, k)) advance(); } concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) { this->assign(other); return *this; } bool get_item( Value*& item, std::size_t k ) { if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) { item = nullptr; return true; } else { padded_page* p = my_array[queue_rep_type::index(k)]; __TBB_ASSERT(p, nullptr); std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); item = &(*p)[i]; return (p->mask & uintptr_t(1) << i) != 0; } } Value* my_item{ nullptr }; queue_rep_type* my_queue_rep{ nullptr }; ticket_type my_head_counter{}; padded_page* my_array[queue_rep_type::n_queue]{}; }; // class concurrent_queue_iterator_base struct concurrent_queue_iterator_provider { template static Iterator get( const Container& container ) { return Iterator(container); } }; // struct concurrent_queue_iterator_provider template class concurrent_queue_iterator : public concurrent_queue_iterator_base::type, Allocator> { using base_type = concurrent_queue_iterator_base::type, Allocator>; public: using value_type = Value; using pointer = value_type*; using reference = value_type&; using difference_type = std::ptrdiff_t; using iterator_category = std::forward_iterator_tag; concurrent_queue_iterator() = default; /** If Value==Container::value_type, then this routine is the copy constructor. If Value==const Container::value_type, then this routine is a conversion constructor. */ concurrent_queue_iterator( const concurrent_queue_iterator& other ) : base_type(other) {} private: concurrent_queue_iterator( const Container& container ) : base_type(container.my_queue_representation) {} public: concurrent_queue_iterator& operator=( const concurrent_queue_iterator& other ) { this->assign(other); return *this; } reference operator*() const { return *static_cast(this->my_item); } pointer operator->() const { return &operator*(); } concurrent_queue_iterator& operator++() { this->advance(); return *this; } concurrent_queue_iterator operator++(int) { concurrent_queue_iterator tmp = *this; ++*this; return tmp; } friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { return lhs.my_item == rhs.my_item; } friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { return lhs.my_item != rhs.my_item; } private: friend struct concurrent_queue_iterator_provider; }; // class concurrent_queue_iterator } // namespace d2 } // namespace detail } // tbb #endif // __TBB_detail__concurrent_queue_base_H _concurrent_skip_list.h000066400000000000000000001336521514453371700345500ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2019-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__concurrent_skip_list_H #define __TBB_detail__concurrent_skip_list_H #if !defined(__TBB_concurrent_map_H) && !defined(__TBB_concurrent_set_H) #error Do not #include this internal file directly; use public TBB headers instead. #endif #include "_config.h" #include "_range_common.h" #include "_allocator_traits.h" #include "_template_helpers.h" #include "_node_handle.h" #include "_containers_helpers.h" #include "_assert.h" #include "_exception.h" #include "../enumerable_thread_specific.h" #include #include #include #include #include #include // Need std::geometric_distribution #include // Need std::equal and std::lexicographical_compare #include #if __TBB_CPP20_COMPARISONS_PRESENT #include #endif #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning(push) #pragma warning(disable: 4127) // warning C4127: conditional expression is constant #endif namespace tbb { namespace detail { namespace d2 { template class skip_list_node { using node_ptr = skip_list_node*; public: using value_type = Value; using atomic_node_ptr = std::atomic; using size_type = std::size_t; using container_allocator_type = Allocator; using reference = value_type&; using const_reference = const value_type&; private: using allocator_traits = tbb::detail::allocator_traits; // Allocator is the same as the container allocator=> allocates unitptr_t // It is required to rebind it to value_type to get the correct pointer and const_pointer using value_allocator_traits = typename allocator_traits::template rebind_traits; public: using pointer = typename value_allocator_traits::pointer; using const_pointer = typename value_allocator_traits::const_pointer; //In perfect world these constructor and destructor would have been private, //however this seems technically impractical due to use of allocator_traits. //Should not be called directly, instead use create method skip_list_node( size_type levels ) : my_height(levels), my_index_number(0) {} //Should not be called directly, instead use destroy method ~skip_list_node() {} skip_list_node( const skip_list_node& ) = delete; skip_list_node( skip_list_node&& ) = delete; skip_list_node& operator=( const skip_list_node& ) = delete; skip_list_node& operator=( skip_list_node&& ) = delete; static skip_list_node* create( container_allocator_type& alloc, size_type height ) { size_type sz = calc_node_size(height); static_assert(std::is_same::value, "skip_list_node assumes that passed in allocator operates on bytes"); auto* node = reinterpret_cast(allocator_traits::allocate(alloc, sz)); //Construct the node itself allocator_traits::construct(alloc, node, height); //Construct the level pointers for (size_type l = 0; l < height; ++l) { allocator_traits::construct(alloc, &node->get_atomic_next(l), nullptr); } return node; } static void destroy( container_allocator_type& alloc, skip_list_node* node ) { //Destroy the level pointers for (size_type l = 0; l < node->height(); ++l) { allocator_traits::destroy(alloc, &node->atomic_next(l)); } size_type sz = calc_node_size(node->height()); // Destroy the node itself allocator_traits::destroy(alloc, node); // Deallocate the node allocator_traits::deallocate(alloc, reinterpret_cast(node), sz); } pointer storage() { return &my_value; } reference value() { return *storage(); } node_ptr next( size_type level ) const { node_ptr res = get_atomic_next(level).load(std::memory_order_acquire); __TBB_ASSERT(res == nullptr || res->height() > level, "Broken internal structure"); return res; } atomic_node_ptr& atomic_next( size_type level ) { atomic_node_ptr& res = get_atomic_next(level); #if TBB_USE_DEBUG node_ptr node = res.load(std::memory_order_acquire); __TBB_ASSERT(node == nullptr || node->height() > level, "Broken internal structure"); #endif return res; } void set_next( size_type level, node_ptr n ) { __TBB_ASSERT(n == nullptr || n->height() > level, "Broken internal structure"); get_atomic_next(level).store(n, std::memory_order_relaxed); } size_type height() const { return my_height; } void set_index_number( size_type index_num ) { my_index_number = index_num; } size_type index_number() const { return my_index_number; } private: static size_type calc_node_size( size_type height ) { static_assert(alignof(skip_list_node) >= alignof(atomic_node_ptr), "Incorrect alignment"); return sizeof(skip_list_node) + height * sizeof(atomic_node_ptr); } atomic_node_ptr& get_atomic_next( size_type level ) { atomic_node_ptr* arr = reinterpret_cast(this + 1); return arr[level]; } const atomic_node_ptr& get_atomic_next( size_type level ) const { const atomic_node_ptr* arr = reinterpret_cast(this + 1); return arr[level]; } union { value_type my_value; }; size_type my_height; size_type my_index_number; }; // class skip_list_node template class skip_list_iterator { using node_type = NodeType; using node_ptr = node_type*; public: using iterator_category = std::forward_iterator_tag; using value_type = ValueType; using difference_type = std::ptrdiff_t; using pointer = value_type*; using reference = value_type&; skip_list_iterator() : skip_list_iterator(nullptr) {} skip_list_iterator( const skip_list_iterator& other ) : my_node_ptr(other.my_node_ptr) {} skip_list_iterator& operator=( const skip_list_iterator& other ) { my_node_ptr = other.my_node_ptr; return *this; } reference operator*() const { return my_node_ptr->value(); } pointer operator->() const { return my_node_ptr->storage(); } skip_list_iterator& operator++() { __TBB_ASSERT(my_node_ptr != nullptr, nullptr); my_node_ptr = my_node_ptr->next(0); return *this; } skip_list_iterator operator++(int) { skip_list_iterator tmp = *this; ++*this; return tmp; } private: skip_list_iterator(node_type* n) : my_node_ptr(n) {} node_ptr my_node_ptr; template friend class concurrent_skip_list; template friend class skip_list_iterator; friend class const_range; friend class range; friend bool operator==( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { return lhs.my_node_ptr == rhs.my_node_ptr; } friend bool operator!=( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { return lhs.my_node_ptr != rhs.my_node_ptr; } }; // class skip_list_iterator template class concurrent_skip_list { protected: using container_traits = Traits; using self_type = concurrent_skip_list; using allocator_type = typename container_traits::allocator_type; using allocator_traits_type = tbb::detail::allocator_traits; using key_compare = typename container_traits::compare_type; using value_compare = typename container_traits::value_compare; using key_type = typename container_traits::key_type; using value_type = typename container_traits::value_type; static_assert(std::is_same::value, "value_type of the container should be the same as its allocator"); using size_type = std::size_t; using difference_type = std::ptrdiff_t; static constexpr size_type max_level = container_traits::max_level; using node_allocator_type = typename allocator_traits_type::template rebind_alloc; using node_allocator_traits = tbb::detail::allocator_traits; using list_node_type = skip_list_node; using node_type = d1::node_handle; using iterator = skip_list_iterator; using const_iterator = skip_list_iterator; using reference = value_type&; using const_reference = const value_type&; using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; using random_level_generator_type = typename container_traits::random_level_generator_type; using node_ptr = list_node_type*; using array_type = std::array; private: template using is_transparent = dependent_bool, T>; public: static constexpr bool allow_multimapping = container_traits::allow_multimapping; concurrent_skip_list() : my_head_ptr(nullptr), my_size(0), my_max_height(0) {} explicit concurrent_skip_list( const key_compare& comp, const allocator_type& alloc = allocator_type() ) : my_node_allocator(alloc), my_compare(comp), my_head_ptr(nullptr), my_size(0), my_max_height(0) {} explicit concurrent_skip_list( const allocator_type& alloc ) : concurrent_skip_list(key_compare(), alloc) {} template concurrent_skip_list( InputIterator first, InputIterator last, const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type() ) : concurrent_skip_list(comp, alloc) { internal_copy(first, last); } template concurrent_skip_list( InputIterator first, InputIterator last, const allocator_type& alloc ) : concurrent_skip_list(first, last, key_compare(), alloc) {} concurrent_skip_list( std::initializer_list init, const key_compare& comp = key_compare(), const allocator_type& alloc = allocator_type() ) : concurrent_skip_list(init.begin(), init.end(), comp, alloc) {} concurrent_skip_list( std::initializer_list init, const allocator_type& alloc ) : concurrent_skip_list(init, key_compare(), alloc) {} concurrent_skip_list( const concurrent_skip_list& other ) : my_node_allocator(node_allocator_traits::select_on_container_copy_construction(other.get_allocator())), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), my_size(0), my_max_height(0) { internal_copy(other); __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); } concurrent_skip_list( const concurrent_skip_list& other, const allocator_type& alloc ) : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), my_size(0), my_max_height(0) { internal_copy(other); __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); } concurrent_skip_list( concurrent_skip_list&& other ) : my_node_allocator(std::move(other.my_node_allocator)), my_compare(other.my_compare), my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) // my_head_ptr would be stored in internal_move { internal_move(std::move(other)); } concurrent_skip_list( concurrent_skip_list&& other, const allocator_type& alloc ) : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) { using is_always_equal = typename allocator_traits_type::is_always_equal; internal_move_construct_with_allocator(std::move(other), is_always_equal()); } ~concurrent_skip_list() { clear(); delete_head(); } concurrent_skip_list& operator=( const concurrent_skip_list& other ) { if (this != &other) { clear(); copy_assign_allocators(my_node_allocator, other.my_node_allocator); my_compare = other.my_compare; my_rng = other.my_rng; internal_copy(other); } return *this; } concurrent_skip_list& operator=( concurrent_skip_list&& other ) { if (this != &other) { clear(); delete_head(); my_compare = std::move(other.my_compare); my_rng = std::move(other.my_rng); move_assign_allocators(my_node_allocator, other.my_node_allocator); using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; using is_always_equal = typename node_allocator_traits::is_always_equal; internal_move_assign(std::move(other), tbb::detail::disjunction()); } return *this; } concurrent_skip_list& operator=( std::initializer_list il ) { clear(); insert(il.begin(),il.end()); return *this; } std::pair insert( const value_type& value ) { return internal_insert(value); } std::pair insert( value_type&& value ) { return internal_insert(std::move(value)); } iterator insert( const_iterator, const_reference value ) { // Ignore hint return insert(value).first; } iterator insert( const_iterator, value_type&& value ) { // Ignore hint return insert(std::move(value)).first; } template void insert( InputIterator first, InputIterator last ) { while (first != last) { insert(*first); ++first; } } void insert( std::initializer_list init ) { insert(init.begin(), init.end()); } std::pair insert( node_type&& nh ) { if (!nh.empty()) { auto insert_node = d1::node_handle_accessor::get_node_ptr(nh); std::pair insert_result = internal_insert_node(insert_node); if (insert_result.second) { d1::node_handle_accessor::deactivate(nh); } return insert_result; } return std::pair(end(), false); } iterator insert( const_iterator, node_type&& nh ) { // Ignore hint return insert(std::move(nh)).first; } template std::pair emplace( Args&&... args ) { return internal_insert(std::forward(args)...); } template iterator emplace_hint( const_iterator, Args&&... args ) { // Ignore hint return emplace(std::forward(args)...).first; } iterator unsafe_erase( iterator pos ) { std::pair extract_result = internal_extract(pos); if (extract_result.first) { // node was extracted delete_value_node(extract_result.first); return extract_result.second; } return end(); } iterator unsafe_erase( const_iterator pos ) { return unsafe_erase(get_iterator(pos)); } iterator unsafe_erase( const_iterator first, const_iterator last ) { while (first != last) { // Unsafe erase returns the iterator which follows the erased one first = unsafe_erase(first); } return get_iterator(first); } size_type unsafe_erase( const key_type& key ) { return internal_erase(key); } template typename std::enable_if::value && !std::is_convertible::value && !std::is_convertible::value, size_type>::type unsafe_erase( const K& key ) { return internal_erase(key); } node_type unsafe_extract( const_iterator pos ) { std::pair extract_result = internal_extract(pos); return extract_result.first ? d1::node_handle_accessor::construct(extract_result.first) : node_type(); } node_type unsafe_extract( iterator pos ) { return unsafe_extract(const_iterator(pos)); } node_type unsafe_extract( const key_type& key ) { return unsafe_extract(find(key)); } template typename std::enable_if::value && !std::is_convertible::value && !std::is_convertible::value, node_type>::type unsafe_extract( const K& key ) { return unsafe_extract(find(key)); } iterator lower_bound( const key_type& key ) { return iterator(internal_get_bound(key, my_compare)); } const_iterator lower_bound( const key_type& key ) const { return const_iterator(internal_get_bound(key, my_compare)); } template typename std::enable_if::value, iterator>::type lower_bound( const K& key ) { return iterator(internal_get_bound(key, my_compare)); } template typename std::enable_if::value, const_iterator>::type lower_bound( const K& key ) const { return const_iterator(internal_get_bound(key, my_compare)); } iterator upper_bound( const key_type& key ) { return iterator(internal_get_bound(key, not_greater_compare(my_compare))); } const_iterator upper_bound( const key_type& key ) const { return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); } template typename std::enable_if::value, iterator>::type upper_bound( const K& key ) { return iterator(internal_get_bound(key, not_greater_compare(my_compare))); } template typename std::enable_if::value, const_iterator>::type upper_bound( const K& key ) const { return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); } iterator find( const key_type& key ) { return iterator(internal_find(key)); } const_iterator find( const key_type& key ) const { return const_iterator(internal_find(key)); } template typename std::enable_if::value, iterator>::type find( const K& key ) { return iterator(internal_find(key)); } template typename std::enable_if::value, const_iterator>::type find( const K& key ) const { return const_iterator(internal_find(key)); } size_type count( const key_type& key ) const { return internal_count(key); } template typename std::enable_if::value, size_type>::type count( const K& key ) const { return internal_count(key); } bool contains( const key_type& key ) const { return find(key) != end(); } template typename std::enable_if::value, bool>::type contains( const K& key ) const { return find(key) != end(); } void clear() noexcept { // clear is not thread safe - load can be relaxed node_ptr head = my_head_ptr.load(std::memory_order_relaxed); if (head == nullptr) return; // Head is not allocated => container is empty node_ptr current = head->next(0); // Delete all value nodes in the container while (current) { node_ptr next = current->next(0); delete_value_node(current); current = next; } for (size_type level = 0; level < head->height(); ++level) { head->set_next(level, nullptr); } my_size.store(0, std::memory_order_relaxed); my_max_height.store(0, std::memory_order_relaxed); } iterator begin() { return iterator(internal_begin()); } const_iterator begin() const { return const_iterator(internal_begin()); } const_iterator cbegin() const { return const_iterator(internal_begin()); } iterator end() { return iterator(nullptr); } const_iterator end() const { return const_iterator(nullptr); } const_iterator cend() const { return const_iterator(nullptr); } size_type size() const { return my_size.load(std::memory_order_relaxed); } size_type max_size() const { return node_allocator_traits::max_size(my_node_allocator); } __TBB_nodiscard bool empty() const { return 0 == size(); } allocator_type get_allocator() const { return my_node_allocator; } void swap(concurrent_skip_list& other) { if (this != &other) { using pocs_type = typename node_allocator_traits::propagate_on_container_swap; using is_always_equal = typename node_allocator_traits::is_always_equal; internal_swap(other, tbb::detail::disjunction()); } } std::pair equal_range(const key_type& key) { return internal_equal_range(key); } std::pair equal_range(const key_type& key) const { return internal_equal_range(key); } template typename std::enable_if::value, std::pair>::type equal_range( const K& key ) { return internal_equal_range(key); } template typename std::enable_if::value, std::pair>::type equal_range( const K& key ) const { return internal_equal_range(key); } key_compare key_comp() const { return my_compare; } value_compare value_comp() const { return container_traits::value_comp(my_compare); } class const_range_type { public: using size_type = typename concurrent_skip_list::size_type; using difference_type = typename concurrent_skip_list::difference_type; using iterator = typename concurrent_skip_list::const_iterator; using value_type = typename iterator::value_type; using reference = typename iterator::reference; bool empty() const { return my_begin.my_node_ptr ? (my_begin.my_node_ptr->next(0) == my_end.my_node_ptr) : true; } bool is_divisible() const { return my_begin.my_node_ptr && my_level != 0 ? my_begin.my_node_ptr->next(my_level - 1) != my_end.my_node_ptr : false; } size_type size() const { return std::distance(my_begin, my_end); } const_range_type( const_range_type& r, split) : my_end(r.my_end) { if (r.empty()) { __TBB_ASSERT(my_end.my_node_ptr == nullptr, nullptr); my_begin = my_end; my_level = 0; } else { my_begin = iterator(r.my_begin.my_node_ptr->next(r.my_level - 1)); my_level = my_begin.my_node_ptr->height(); } r.my_end = my_begin; } const_range_type( const concurrent_skip_list& l) : my_end(l.end()), my_begin(l.begin()), my_level(my_begin.my_node_ptr ? my_begin.my_node_ptr->height() : 0) {} iterator begin() const { return my_begin; } iterator end() const { return my_end; } size_type grainsize() const { return 1; } private: const_iterator my_end; const_iterator my_begin; size_type my_level; }; // class const_range_type class range_type : public const_range_type { public: using iterator = typename concurrent_skip_list::iterator; using value_type = typename iterator::value_type; using reference = typename iterator::reference; range_type(range_type& r, split) : const_range_type(r, split()) {} range_type(const concurrent_skip_list& l) : const_range_type(l) {} iterator begin() const { node_ptr node = const_range_type::begin().my_node_ptr; return iterator(node); } iterator end() const { node_ptr node = const_range_type::end().my_node_ptr; return iterator(node); } }; // class range_type range_type range() { return range_type(*this); } const_range_type range() const { return const_range_type(*this); } private: node_ptr internal_begin() const { node_ptr head = get_head(); return head == nullptr ? head : head->next(0); } void internal_move(concurrent_skip_list&& other) { my_head_ptr.store(other.my_head_ptr.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_head_ptr.store(nullptr, std::memory_order_relaxed); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_size.store(0, std::memory_order_relaxed); my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_max_height.store(0, std::memory_order_relaxed); } void internal_move_construct_with_allocator(concurrent_skip_list&& other, /*is_always_equal = */std::true_type) { internal_move(std::move(other)); } void internal_move_construct_with_allocator(concurrent_skip_list&& other, /*is_always_equal = */std::false_type) { if (my_node_allocator == other.get_allocator()) { internal_move(std::move(other)); } else { my_size.store(0, std::memory_order_relaxed); my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); } } static const key_type& get_key( node_ptr n ) { __TBB_ASSERT(n, nullptr); return container_traits::get_key(static_cast(n)->value()); } template bool found( node_ptr node, const K& key ) const { return node != nullptr && !my_compare(key, get_key(node)); } template node_ptr internal_find(const K& key) const { return allow_multimapping ? internal_find_multi(key) : internal_find_unique(key); } template node_ptr internal_find_multi( const K& key ) const { node_ptr prev = get_head(); if (prev == nullptr) return nullptr; // If the head node is not allocated - exit node_ptr curr = nullptr; node_ptr old_curr = curr; for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { curr = internal_find_position(h - 1, prev, key, my_compare); if (curr != old_curr && found(curr, key)) { return curr; } old_curr = curr; } return nullptr; } template node_ptr internal_find_unique( const K& key ) const { const_iterator it = lower_bound(key); return (it == end() || my_compare(key, container_traits::get_key(*it))) ? nullptr : it.my_node_ptr; } template size_type internal_count( const K& key ) const { if (allow_multimapping) { // TODO: reimplement without double traversal std::pair r = equal_range(key); return std::distance(r.first, r.second); } return size_type(contains(key) ? 1 : 0); } template std::pair internal_equal_range(const K& key) const { iterator lb = get_iterator(lower_bound(key)); auto result = std::make_pair(lb, lb); // If the lower bound points to the node with the requested key if (found(lb.my_node_ptr, key)) { if (!allow_multimapping) { // For unique containers - move the second iterator forward and exit ++result.second; } else { // For multi containers - find the upper bound starting from the lower bound node_ptr prev = lb.my_node_ptr; node_ptr curr = nullptr; not_greater_compare cmp(my_compare); // Start from the lower bound of the range for (size_type h = prev->height(); h > 0; --h) { curr = prev->next(h - 1); while (curr && cmp(get_key(curr), key)) { prev = curr; // If the height of the next node is greater than the current one - jump to its height if (h < curr->height()) { h = curr->height(); } curr = prev->next(h - 1); } } result.second = iterator(curr); } } return result; } // Finds position on the level using comparator cmp starting from the node prev template node_ptr internal_find_position( size_type level, node_ptr& prev, const K& key, const Comparator& cmp ) const { __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); node_ptr curr = prev->next(level); while (curr && cmp(get_key(curr), key)) { prev = curr; __TBB_ASSERT(level < prev->height(), nullptr); curr = prev->next(level); } return curr; } // The same as previous overload, but allows index_number comparison template node_ptr internal_find_position( size_type level, node_ptr& prev, node_ptr node, const Comparator& cmp ) const { __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); node_ptr curr = prev->next(level); while (curr && cmp(get_key(curr), get_key(node))) { if (allow_multimapping && cmp(get_key(node), get_key(curr)) && curr->index_number() > node->index_number()) { break; } prev = curr; __TBB_ASSERT(level < prev->height(), nullptr); curr = prev->next(level); } return curr; } template void fill_prev_curr_arrays(array_type& prev_nodes, array_type& curr_nodes, node_ptr node, const key_type& key, const Comparator& cmp, node_ptr head ) { size_type curr_max_height = my_max_height.load(std::memory_order_acquire); size_type node_height = node->height(); if (curr_max_height < node_height) { std::fill(prev_nodes.begin() + curr_max_height, prev_nodes.begin() + node_height, head); std::fill(curr_nodes.begin() + curr_max_height, curr_nodes.begin() + node_height, nullptr); } node_ptr prev = head; for (size_type level = curr_max_height; level > 0; --level) { node_ptr curr = internal_find_position(level - 1, prev, key, cmp); prev_nodes[level - 1] = prev; curr_nodes[level - 1] = curr; } } void fill_prev_array_for_existing_node( array_type& prev_nodes, node_ptr node ) { node_ptr head = create_head_if_necessary(); prev_nodes.fill(head); node_ptr prev = head; for (size_type level = node->height(); level > 0; --level) { while (prev->next(level - 1) != node) { prev = prev->next(level - 1); } prev_nodes[level - 1] = prev; } } struct not_greater_compare { const key_compare& my_less_compare; not_greater_compare( const key_compare& less_compare ) : my_less_compare(less_compare) {} template bool operator()( const K1& first, const K2& second ) const { return !my_less_compare(second, first); } }; not_greater_compare select_comparator( /*allow_multimapping = */ std::true_type ) { return not_greater_compare(my_compare); } key_compare select_comparator( /*allow_multimapping = */ std::false_type ) { return my_compare; } template std::pair internal_insert( Args&&... args ) { node_ptr new_node = create_value_node(std::forward(args)...); std::pair insert_result = internal_insert_node(new_node); if (!insert_result.second) { delete_value_node(new_node); } return insert_result; } std::pair internal_insert_node( node_ptr new_node ) { array_type prev_nodes; array_type curr_nodes; size_type new_height = new_node->height(); auto compare = select_comparator(std::integral_constant{}); node_ptr head_node = create_head_if_necessary(); for (;;) { fill_prev_curr_arrays(prev_nodes, curr_nodes, new_node, get_key(new_node), compare, head_node); node_ptr prev = prev_nodes[0]; node_ptr next = curr_nodes[0]; if (allow_multimapping) { new_node->set_index_number(prev->index_number() + 1); } else { if (found(next, get_key(new_node))) { return std::pair(iterator(next), false); } } new_node->set_next(0, next); if (!prev->atomic_next(0).compare_exchange_strong(next, new_node)) { continue; } // If the node was successfully linked on the first level - it will be linked on other levels // Insertion cannot fail starting from this point // If the height of inserted node is greater than maximum - increase maximum size_type max_height = my_max_height.load(std::memory_order_acquire); for (;;) { if (new_height <= max_height || my_max_height.compare_exchange_strong(max_height, new_height)) { // If the maximum was successfully updated by current thread // or by an other thread for the value, greater or equal to new_height break; } } for (std::size_t level = 1; level < new_height; ++level) { // Link the node on upper levels for (;;) { prev = prev_nodes[level]; next = static_cast(curr_nodes[level]); new_node->set_next(level, next); __TBB_ASSERT(new_node->height() > level, "Internal structure break"); if (prev->atomic_next(level).compare_exchange_strong(next, new_node)) { break; } for (size_type lev = level; lev != new_height; ++lev ) { curr_nodes[lev] = internal_find_position(lev, prev_nodes[lev], new_node, compare); } } } ++my_size; return std::pair(iterator(new_node), true); } } template node_ptr internal_get_bound( const K& key, const Comparator& cmp ) const { node_ptr prev = get_head(); if (prev == nullptr) return nullptr; // If the head node is not allocated - exit node_ptr curr = nullptr; for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { curr = internal_find_position(h - 1, prev, key, cmp); } return curr; } template size_type internal_erase( const K& key ) { auto eq = equal_range(key); size_type old_size = size(); unsafe_erase(eq.first, eq.second); return old_size - size(); } // Returns node_ptr to the extracted node and node_ptr to the next node after the extracted std::pair internal_extract( const_iterator it ) { std::pair result(nullptr, nullptr); if ( it != end() ) { array_type prev_nodes; node_ptr erase_node = it.my_node_ptr; node_ptr next_node = erase_node->next(0); fill_prev_array_for_existing_node(prev_nodes, erase_node); for (size_type level = 0; level < erase_node->height(); ++level) { prev_nodes[level]->set_next(level, erase_node->next(level)); erase_node->set_next(level, nullptr); } my_size.fetch_sub(1, std::memory_order_relaxed); result.first = erase_node; result.second = next_node; } return result; } protected: template void internal_merge( SourceType&& source ) { using source_type = typename std::decay::type; using source_iterator = typename source_type::iterator; static_assert((std::is_same::value), "Incompatible containers cannot be merged"); for (source_iterator it = source.begin(); it != source.end();) { source_iterator where = it++; if (allow_multimapping || !contains(container_traits::get_key(*where))) { node_type handle = source.unsafe_extract(where); __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty"); if (!insert(std::move(handle)).second) { //If the insertion fails - return the node into source source.insert(std::move(handle)); } __TBB_ASSERT(handle.empty(), "Node handle should be empty after the insertion"); } } } private: void internal_copy( const concurrent_skip_list& other ) { internal_copy(other.begin(), other.end()); } template void internal_copy( Iterator first, Iterator last ) { try_call([&] { for (auto it = first; it != last; ++it) { insert(*it); } }).on_exception([&] { clear(); delete_head(); }); } node_ptr create_node( size_type height ) { return list_node_type::create(my_node_allocator, height); } template node_ptr create_value_node( Args&&... args ) { node_ptr node = create_node(my_rng()); // try_call API is not convenient here due to broken // variadic capture on GCC 4.8.5 auto value_guard = make_raii_guard([&] { delete_node(node); }); // Construct the value inside the node node_allocator_traits::construct(my_node_allocator, node->storage(), std::forward(args)...); value_guard.dismiss(); return node; } node_ptr create_head_node() { return create_node(max_level); } void delete_head() { node_ptr head = my_head_ptr.load(std::memory_order_relaxed); if (head != nullptr) { delete_node(head); my_head_ptr.store(nullptr, std::memory_order_relaxed); } } void delete_node( node_ptr node ) { list_node_type::destroy(my_node_allocator, node); } void delete_value_node( node_ptr node ) { // Destroy the value inside the node node_allocator_traits::destroy(my_node_allocator, node->storage()); delete_node(node); } node_ptr get_head() const { return my_head_ptr.load(std::memory_order_acquire); } node_ptr create_head_if_necessary() { node_ptr current_head = get_head(); if (current_head == nullptr) { // Head node was not created - create it node_ptr new_head = create_head_node(); if (my_head_ptr.compare_exchange_strong(current_head, new_head)) { current_head = new_head; } else { // If an other thread has already created the head node - destroy new_head // current_head now points to the actual head node delete_node(new_head); } } __TBB_ASSERT(my_head_ptr.load(std::memory_order_relaxed) != nullptr, nullptr); __TBB_ASSERT(current_head != nullptr, nullptr); return current_head; } static iterator get_iterator( const_iterator it ) { return iterator(it.my_node_ptr); } void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::true_type ) { internal_move(std::move(other)); } void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::false_type ) { if (my_node_allocator == other.my_node_allocator) { internal_move(std::move(other)); } else { internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); } } void internal_swap_fields( concurrent_skip_list& other ) { using std::swap; swap_allocators(my_node_allocator, other.my_node_allocator); swap(my_compare, other.my_compare); swap(my_rng, other.my_rng); swap_atomics_relaxed(my_head_ptr, other.my_head_ptr); swap_atomics_relaxed(my_size, other.my_size); swap_atomics_relaxed(my_max_height, other.my_max_height); } void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::true_type ) { internal_swap_fields(other); } void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::false_type ) { __TBB_ASSERT(my_node_allocator == other.my_node_allocator, "Swapping with unequal allocators is not allowed"); internal_swap_fields(other); } node_allocator_type my_node_allocator; key_compare my_compare; random_level_generator_type my_rng; std::atomic my_head_ptr; std::atomic my_size; std::atomic my_max_height; template friend class concurrent_skip_list; }; // class concurrent_skip_list template bool operator==( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { if (lhs.size() != rhs.size()) return false; #if _MSC_VER // Passing "unchecked" iterators to std::equal with 3 parameters // causes compiler warnings. // The workaround is to use overload with 4 parameters, which is // available since C++14 - minimally supported version on MSVC return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); #else return std::equal(lhs.begin(), lhs.end(), rhs.begin()); #endif } #if !__TBB_CPP20_COMPARISONS_PRESENT template bool operator!=( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { return !(lhs == rhs); } #endif #if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT template tbb::detail::synthesized_three_way_result operator<=>( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), tbb::detail::synthesized_three_way_comparator{}); } #else template bool operator<( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); } template bool operator>( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { return rhs < lhs; } template bool operator<=( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { return !(rhs < lhs); } template bool operator>=( const concurrent_skip_list& lhs, const concurrent_skip_list& rhs ) { return !(lhs < rhs); } #endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT // Generates a number from the interval [0, MaxLevel). template class concurrent_geometric_level_generator { public: static constexpr std::size_t max_level = MaxLevel; // TODO: modify the algorithm to accept other values of max_level static_assert(max_level == 32, "Incompatible max_level for rng"); concurrent_geometric_level_generator() : engines(std::minstd_rand::result_type(time(nullptr))) {} std::size_t operator()() { // +1 is required to pass at least 1 into log2 (log2(0) is undefined) // -1 is required to have an ability to return 0 from the generator (max_level - log2(2^31) - 1) std::size_t result = max_level - std::size_t(tbb::detail::log2(engines.local()() + 1)) - 1; __TBB_ASSERT(result <= max_level, nullptr); return result; } private: tbb::enumerable_thread_specific engines; }; } // namespace d2 } // namespace detail } // namespace tbb #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning(pop) // warning 4127 is back #endif #endif // __TBB_detail__concurrent_skip_list_H _concurrent_unordered_base.h000066400000000000000000001767431514453371700355400ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__concurrent_unordered_base_H #define __TBB_detail__concurrent_unordered_base_H #if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H) #error Do not #include this internal file directly; use public TBB headers instead. #endif #include "_range_common.h" #include "_containers_helpers.h" #include "_segment_table.h" #include "_hash_compare.h" #include "_allocator_traits.h" #include "_node_handle.h" #include "_assert.h" #include "_utils.h" #include "_exception.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning(push) #pragma warning(disable: 4127) // warning C4127: conditional expression is constant #endif namespace tbb { namespace detail { namespace d1 { template class concurrent_unordered_base; template class solist_iterator { private: using node_ptr = typename Container::value_node_ptr; template friend class split_ordered_list; template friend class solist_iterator; template friend class concurrent_unordered_base; template friend bool operator==( const solist_iterator& i, const solist_iterator& j ); template friend bool operator!=( const solist_iterator& i, const solist_iterator& j ); public: using value_type = Value; using difference_type = typename Container::difference_type; using pointer = value_type*; using reference = value_type&; using iterator_category = std::forward_iterator_tag; solist_iterator() : my_node_ptr(nullptr) {} solist_iterator( const solist_iterator& other ) : my_node_ptr(other.my_node_ptr) {} solist_iterator& operator=( const solist_iterator& other ) { my_node_ptr = other.my_node_ptr; return *this; } reference operator*() const { return my_node_ptr->value(); } pointer operator->() const { return my_node_ptr->storage(); } solist_iterator& operator++() { auto next_node = my_node_ptr->next(); while(next_node && next_node->is_dummy()) { next_node = next_node->next(); } my_node_ptr = static_cast(next_node); return *this; } solist_iterator operator++(int) { solist_iterator tmp = *this; ++*this; return tmp; } private: solist_iterator( node_ptr pnode ) : my_node_ptr(pnode) {} node_ptr get_node_ptr() const { return my_node_ptr; } node_ptr my_node_ptr; }; template bool operator==( const solist_iterator& i, const solist_iterator& j ) { return i.my_node_ptr == j.my_node_ptr; } template bool operator!=( const solist_iterator& i, const solist_iterator& j ) { return i.my_node_ptr != j.my_node_ptr; } template class list_node { public: using node_ptr = list_node*; using sokey_type = SokeyType; list_node(sokey_type key) : my_next(nullptr), my_order_key(key) {} void init( sokey_type key ) { my_order_key = key; } sokey_type order_key() const { return my_order_key; } bool is_dummy() { // The last bit of order key is unset for dummy nodes return (my_order_key & 0x1) == 0; } node_ptr next() const { return my_next.load(std::memory_order_acquire); } void set_next( node_ptr next_node ) { my_next.store(next_node, std::memory_order_release); } bool try_set_next( node_ptr expected_next, node_ptr new_next ) { return my_next.compare_exchange_strong(expected_next, new_next); } private: std::atomic my_next; sokey_type my_order_key; }; // class list_node template class value_node : public list_node { public: using base_type = list_node; using sokey_type = typename base_type::sokey_type; using value_type = ValueType; value_node( sokey_type ord_key ) : base_type(ord_key) {} ~value_node() {} value_type* storage() { return reinterpret_cast(&my_value); } value_type& value() { return *storage(); } private: using aligned_storage_type = typename std::aligned_storage::type; aligned_storage_type my_value; }; // class value_node template class concurrent_unordered_base { using self_type = concurrent_unordered_base; using traits_type = Traits; using hash_compare_type = typename traits_type::hash_compare_type; class unordered_segment_table; public: using value_type = typename traits_type::value_type; using key_type = typename traits_type::key_type; using allocator_type = typename traits_type::allocator_type; private: using allocator_traits_type = tbb::detail::allocator_traits; // TODO: check assert conditions for different C++ standards static_assert(std::is_same::value, "value_type of the container must be the same as its allocator"); using sokey_type = std::size_t; public: using size_type = std::size_t; using difference_type = std::ptrdiff_t; using iterator = solist_iterator; using const_iterator = solist_iterator; using local_iterator = iterator; using const_local_iterator = const_iterator; using reference = value_type&; using const_reference = const value_type&; using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; using hasher = typename hash_compare_type::hasher; using key_equal = typename hash_compare_type::key_equal; private: using list_node_type = list_node; using value_node_type = value_node; using node_ptr = list_node_type*; using value_node_ptr = value_node_type*; using value_node_allocator_type = typename allocator_traits_type::template rebind_alloc; using node_allocator_type = typename allocator_traits_type::template rebind_alloc; using node_allocator_traits = tbb::detail::allocator_traits; using value_node_allocator_traits = tbb::detail::allocator_traits; static constexpr size_type round_up_to_power_of_two( size_type bucket_count ) { return size_type(1) << size_type(tbb::detail::log2(uintptr_t(bucket_count == 0 ? 1 : bucket_count) * 2 - 1)); } template using is_transparent = dependent_bool, T>; public: using node_type = node_handle; explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(), const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) : my_size(0), my_bucket_count(round_up_to_power_of_two(bucket_count)), my_max_load_factor(float(initial_max_load_factor)), my_hash_compare(hash, equal), my_head(sokey_type(0)), my_segments(alloc) {} concurrent_unordered_base() : concurrent_unordered_base(initial_bucket_count) {} concurrent_unordered_base( size_type bucket_count, const allocator_type& alloc ) : concurrent_unordered_base(bucket_count, hasher(), key_equal(), alloc) {} concurrent_unordered_base( size_type bucket_count, const hasher& hash, const allocator_type& alloc ) : concurrent_unordered_base(bucket_count, hash, key_equal(), alloc) {} explicit concurrent_unordered_base( const allocator_type& alloc ) : concurrent_unordered_base(initial_bucket_count, hasher(), key_equal(), alloc) {} template concurrent_unordered_base( InputIterator first, InputIterator last, size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(), const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) : concurrent_unordered_base(bucket_count, hash, equal, alloc) { insert(first, last); } template concurrent_unordered_base( InputIterator first, InputIterator last, size_type bucket_count, const allocator_type& alloc ) : concurrent_unordered_base(first, last, bucket_count, hasher(), key_equal(), alloc) {} template concurrent_unordered_base( InputIterator first, InputIterator last, size_type bucket_count, const hasher& hash, const allocator_type& alloc ) : concurrent_unordered_base(first, last, bucket_count, hash, key_equal(), alloc) {} concurrent_unordered_base( const concurrent_unordered_base& other ) : my_size(other.my_size.load(std::memory_order_relaxed)), my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), my_max_load_factor(other.my_max_load_factor), my_hash_compare(other.my_hash_compare), my_head(other.my_head.order_key()), my_segments(other.my_segments) { try_call( [&] { internal_copy(other); } ).on_exception( [&] { clear(); }); } concurrent_unordered_base( const concurrent_unordered_base& other, const allocator_type& alloc ) : my_size(other.my_size.load(std::memory_order_relaxed)), my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), my_max_load_factor(other.my_max_load_factor), my_hash_compare(other.my_hash_compare), my_head(other.my_head.order_key()), my_segments(other.my_segments, alloc) { try_call( [&] { internal_copy(other); } ).on_exception( [&] { clear(); }); } concurrent_unordered_base( concurrent_unordered_base&& other ) : my_size(other.my_size.load(std::memory_order_relaxed)), my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), my_max_load_factor(std::move(other.my_max_load_factor)), my_hash_compare(std::move(other.my_hash_compare)), my_head(other.my_head.order_key()), my_segments(std::move(other.my_segments)) { move_content(std::move(other)); } concurrent_unordered_base( concurrent_unordered_base&& other, const allocator_type& alloc ) : my_size(other.my_size.load(std::memory_order_relaxed)), my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), my_max_load_factor(std::move(other.my_max_load_factor)), my_hash_compare(std::move(other.my_hash_compare)), my_head(other.my_head.order_key()), my_segments(std::move(other.my_segments), alloc) { using is_always_equal = typename allocator_traits_type::is_always_equal; internal_move_construct_with_allocator(std::move(other), alloc, is_always_equal()); } concurrent_unordered_base( std::initializer_list init, size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(), const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) : concurrent_unordered_base(init.begin(), init.end(), bucket_count, hash, equal, alloc) {} concurrent_unordered_base( std::initializer_list init, size_type bucket_count, const allocator_type& alloc ) : concurrent_unordered_base(init, bucket_count, hasher(), key_equal(), alloc) {} concurrent_unordered_base( std::initializer_list init, size_type bucket_count, const hasher& hash, const allocator_type& alloc ) : concurrent_unordered_base(init, bucket_count, hash, key_equal(), alloc) {} ~concurrent_unordered_base() { internal_clear(); } concurrent_unordered_base& operator=( const concurrent_unordered_base& other ) { if (this != &other) { clear(); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); my_max_load_factor = other.my_max_load_factor; my_hash_compare = other.my_hash_compare; my_segments = other.my_segments; internal_copy(other); // TODO: guards for exceptions? } return *this; } concurrent_unordered_base& operator=( concurrent_unordered_base&& other ) noexcept(unordered_segment_table::is_noexcept_assignment) { if (this != &other) { clear(); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); my_max_load_factor = std::move(other.my_max_load_factor); my_hash_compare = std::move(other.my_hash_compare); my_segments = std::move(other.my_segments); using pocma_type = typename allocator_traits_type::propagate_on_container_move_assignment; using is_always_equal = typename allocator_traits_type::is_always_equal; internal_move_assign(std::move(other), tbb::detail::disjunction()); } return *this; } concurrent_unordered_base& operator=( std::initializer_list init ) { clear(); insert(init); return *this; } void swap( concurrent_unordered_base& other ) noexcept(unordered_segment_table::is_noexcept_swap) { if (this != &other) { using pocs_type = typename allocator_traits_type::propagate_on_container_swap; using is_always_equal = typename allocator_traits_type::is_always_equal; internal_swap(other, tbb::detail::disjunction()); } } allocator_type get_allocator() const noexcept { return my_segments.get_allocator(); } iterator begin() noexcept { return iterator(first_value_node(&my_head)); } const_iterator begin() const noexcept { return const_iterator(first_value_node(const_cast(&my_head))); } const_iterator cbegin() const noexcept { return const_iterator(first_value_node(const_cast(&my_head))); } iterator end() noexcept { return iterator(nullptr); } const_iterator end() const noexcept { return const_iterator(nullptr); } const_iterator cend() const noexcept { return const_iterator(nullptr); } __TBB_nodiscard bool empty() const noexcept { return size() == 0; } size_type size() const noexcept { return my_size.load(std::memory_order_relaxed); } size_type max_size() const noexcept { return allocator_traits_type::max_size(get_allocator()); } void clear() noexcept { internal_clear(); } std::pair insert( const value_type& value ) { return internal_insert_value(value); } std::pair insert( value_type&& value ) { return internal_insert_value(std::move(value)); } iterator insert( const_iterator, const value_type& value ) { // Ignore hint return insert(value).first; } iterator insert( const_iterator, value_type&& value ) { // Ignore hint return insert(std::move(value)).first; } template void insert( InputIterator first, InputIterator last ) { for (; first != last; ++first) { insert(*first); } } void insert( std::initializer_list init ) { insert(init.begin(), init.end()); } std::pair insert( node_type&& nh ) { if (!nh.empty()) { value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh); auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { insert_node->init(order_key); return insert_node; }; auto insert_result = internal_insert(insert_node->value(), init_node); if (insert_result.inserted) { // If the insertion succeeded - set node handle to the empty state __TBB_ASSERT(insert_result.remaining_node == nullptr, "internal_insert_node should not return the remaining node if the insertion succeeded"); node_handle_accessor::deactivate(nh); } return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; } return {end(), false}; } iterator insert( const_iterator, node_type&& nh ) { // Ignore hint return insert(std::move(nh)).first; } template std::pair emplace( Args&&... args ) { // Create a node with temporary order_key 0, which will be reinitialize // in internal_insert after the hash calculation value_node_ptr insert_node = create_node(0, std::forward(args)...); auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { insert_node->init(order_key); return insert_node; }; auto insert_result = internal_insert(insert_node->value(), init_node); if (!insert_result.inserted) { // If the insertion failed - destroy the node which was created insert_node->init(split_order_key_regular(1)); destroy_node(insert_node); } return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; } template iterator emplace_hint( const_iterator, Args&&... args ) { // Ignore hint return emplace(std::forward(args)...).first; } iterator unsafe_erase( const_iterator pos ) { return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); } iterator unsafe_erase( iterator pos ) { return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); } iterator unsafe_erase( const_iterator first, const_iterator last ) { while(first != last) { first = unsafe_erase(first); } return iterator(first.get_node_ptr()); } size_type unsafe_erase( const key_type& key ) { return internal_erase_by_key(key); } template typename std::enable_if::value && !std::is_convertible::value && !std::is_convertible::value, size_type>::type unsafe_erase( const K& key ) { return internal_erase_by_key(key); } node_type unsafe_extract( const_iterator pos ) { internal_extract(pos.get_node_ptr()); return node_handle_accessor::construct(pos.get_node_ptr()); } node_type unsafe_extract( iterator pos ) { internal_extract(pos.get_node_ptr()); return node_handle_accessor::construct(pos.get_node_ptr()); } node_type unsafe_extract( const key_type& key ) { iterator item = find(key); return item == end() ? node_type() : unsafe_extract(item); } template typename std::enable_if::value && !std::is_convertible::value && !std::is_convertible::value, node_type>::type unsafe_extract( const K& key ) { iterator item = find(key); return item == end() ? node_type() : unsafe_extract(item); } // Lookup functions iterator find( const key_type& key ) { value_node_ptr result = internal_find(key); return result == nullptr ? end() : iterator(result); } const_iterator find( const key_type& key ) const { value_node_ptr result = const_cast(this)->internal_find(key); return result == nullptr ? end() : const_iterator(result); } template typename std::enable_if::value, iterator>::type find( const K& key ) { value_node_ptr result = internal_find(key); return result == nullptr ? end() : iterator(result); } template typename std::enable_if::value, const_iterator>::type find( const K& key ) const { value_node_ptr result = const_cast(this)->internal_find(key); return result == nullptr ? end() : const_iterator(result); } std::pair equal_range( const key_type& key ) { auto result = internal_equal_range(key); return std::make_pair(iterator(result.first), iterator(result.second)); } std::pair equal_range( const key_type& key ) const { auto result = const_cast(this)->internal_equal_range(key); return std::make_pair(const_iterator(result.first), const_iterator(result.second)); } template typename std::enable_if::value, std::pair>::type equal_range( const K& key ) { auto result = internal_equal_range(key); return std::make_pair(iterator(result.first), iterator(result.second)); } template typename std::enable_if::value, std::pair>::type equal_range( const K& key ) const { auto result = const_cast(this)->internal_equal_range(key); return std::make_pair(iterator(result.first), iterator(result.second)); } size_type count( const key_type& key ) const { return internal_count(key); } template typename std::enable_if::value, size_type>::type count( const K& key ) const { return internal_count(key); } bool contains( const key_type& key ) const { return find(key) != end(); } template typename std::enable_if::value, bool>::type contains( const K& key ) const { return find(key) != end(); } // Bucket interface local_iterator unsafe_begin( size_type n ) { return local_iterator(first_value_node(get_bucket(n))); } const_local_iterator unsafe_begin( size_type n ) const { auto bucket_begin = first_value_node(const_cast(this)->get_bucket(n)); return const_local_iterator(bucket_begin); } const_local_iterator unsafe_cbegin( size_type n ) const { auto bucket_begin = first_value_node(const_cast(this)->get_bucket(n)); return const_local_iterator(bucket_begin); } local_iterator unsafe_end( size_type n ) { size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : local_iterator(nullptr); } const_local_iterator unsafe_end( size_type n ) const { size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); } const_local_iterator unsafe_cend( size_type n ) const { size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); } size_type unsafe_bucket_count() const { return my_bucket_count.load(std::memory_order_relaxed); } size_type unsafe_max_bucket_count() const { return max_size(); } size_type unsafe_bucket_size( size_type n ) const { return size_type(std::distance(unsafe_begin(n), unsafe_end(n))); } size_type unsafe_bucket( const key_type& key ) const { return my_hash_compare(key) % my_bucket_count.load(std::memory_order_relaxed); } // Hash policy float load_factor() const { return float(size() / float(my_bucket_count.load(std::memory_order_acquire))); } float max_load_factor() const { return my_max_load_factor; } void max_load_factor( float mlf ) { if (mlf != mlf || mlf < 0) { tbb::detail::throw_exception(exception_id::invalid_load_factor); } my_max_load_factor = mlf; } // TODO: unsafe? void rehash( size_type bucket_count ) { size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); if (current_bucket_count < bucket_count) { // TODO: do we need do-while here? my_bucket_count.compare_exchange_strong(current_bucket_count, round_up_to_power_of_two(bucket_count)); } } void reserve( size_type elements_count ) { size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); size_type necessary_bucket_count = current_bucket_count; do { // TODO: Log2 seems useful here while (necessary_bucket_count * max_load_factor() < elements_count) { necessary_bucket_count <<= 1; } } while (current_bucket_count >= necessary_bucket_count || !my_bucket_count.compare_exchange_strong(current_bucket_count, necessary_bucket_count)); } // Observers hasher hash_function() const { return my_hash_compare.hash_function(); } key_equal key_eq() const { return my_hash_compare.key_eq(); } class const_range_type { private: const concurrent_unordered_base& my_instance; node_ptr my_begin_node; // may be node* const node_ptr my_end_node; mutable node_ptr my_midpoint_node; public: using size_type = typename concurrent_unordered_base::size_type; using value_type = typename concurrent_unordered_base::value_type; using reference = typename concurrent_unordered_base::reference; using difference_type = typename concurrent_unordered_base::difference_type; using iterator = typename concurrent_unordered_base::const_iterator; bool empty() const { return my_begin_node == my_end_node; } bool is_divisible() const { return my_midpoint_node != my_end_node; } size_type grainsize() const { return 1; } const_range_type( const_range_type& range, split ) : my_instance(range.my_instance), my_begin_node(range.my_midpoint_node), my_end_node(range.my_end_node) { range.my_end_node = my_begin_node; __TBB_ASSERT(!empty(), "Splitting despite the range is not divisible"); __TBB_ASSERT(!range.empty(), "Splitting despite the range is not divisible"); set_midpoint(); range.set_midpoint(); } iterator begin() const { return iterator(my_instance.first_value_node(my_begin_node)); } iterator end() const { return iterator(my_instance.first_value_node(my_end_node)); } const_range_type( const concurrent_unordered_base& table ) : my_instance(table), my_begin_node(my_instance.first_value_node(const_cast(&table.my_head))), my_end_node(nullptr) { set_midpoint(); } private: void set_midpoint() const { if (empty()) { my_midpoint_node = my_end_node; } else { sokey_type invalid_key = ~sokey_type(0); sokey_type begin_key = my_begin_node != nullptr ? my_begin_node->order_key() : invalid_key; sokey_type end_key = my_end_node != nullptr ? my_end_node->order_key() : invalid_key; size_type mid_bucket = reverse_bits(begin_key + (end_key - begin_key) / 2) % my_instance.my_bucket_count.load(std::memory_order_relaxed); while( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed) == nullptr) { mid_bucket = my_instance.get_parent(mid_bucket); } if (reverse_bits(mid_bucket) > begin_key) { // Found a dummy node between begin and end my_midpoint_node = my_instance.first_value_node( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed)); } else { // Didn't find a dummy node between begin and end my_midpoint_node = my_end_node; } } } }; // class const_range_type class range_type : public const_range_type { public: using iterator = typename concurrent_unordered_base::iterator; using const_range_type::const_range_type; iterator begin() const { return iterator(const_range_type::begin().get_node_ptr()); } iterator end() const { return iterator(const_range_type::end().get_node_ptr()); } }; // class range_type // Parallel iteration range_type range() { return range_type(*this); } const_range_type range() const { return const_range_type(*this); } protected: static constexpr bool allow_multimapping = traits_type::allow_multimapping; private: static constexpr size_type initial_bucket_count = 8; static constexpr float initial_max_load_factor = 4; // TODO: consider 1? static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1; class unordered_segment_table : public segment_table, allocator_type, unordered_segment_table, pointers_per_embedded_table> { using self_type = unordered_segment_table; using atomic_node_ptr = std::atomic; using base_type = segment_table, allocator_type, unordered_segment_table, pointers_per_embedded_table>; using segment_type = typename base_type::segment_type; using base_allocator_type = typename base_type::allocator_type; using segment_allocator_type = typename allocator_traits_type::template rebind_alloc; using segment_allocator_traits = tbb::detail::allocator_traits; public: // Segment table for unordered containers should not be extended in the wait- free implementation static constexpr bool allow_table_extending = false; static constexpr bool is_noexcept_assignment = std::is_nothrow_move_assignable::value && std::is_nothrow_move_assignable::value && segment_allocator_traits::is_always_equal::value; static constexpr bool is_noexcept_swap = tbb::detail::is_nothrow_swappable::value && tbb::detail::is_nothrow_swappable::value && segment_allocator_traits::is_always_equal::value; // TODO: using base_type::base_type is not compiling on Windows and Intel Compiler - investigate unordered_segment_table( const base_allocator_type& alloc = base_allocator_type() ) : base_type(alloc) {} unordered_segment_table( const unordered_segment_table& ) = default; unordered_segment_table( const unordered_segment_table& other, const base_allocator_type& alloc ) : base_type(other, alloc) {} unordered_segment_table( unordered_segment_table&& ) = default; unordered_segment_table( unordered_segment_table&& other, const base_allocator_type& alloc ) : base_type(std::move(other), alloc) {} unordered_segment_table& operator=( const unordered_segment_table& ) = default; unordered_segment_table& operator=( unordered_segment_table&& ) = default; segment_type create_segment( typename base_type::segment_table_type, typename base_type::segment_index_type segment_index, size_type ) { segment_allocator_type alloc(this->get_allocator()); size_type seg_size = this->segment_size(segment_index); segment_type new_segment = segment_allocator_traits::allocate(alloc, seg_size); for (size_type i = 0; i != seg_size; ++i) { segment_allocator_traits::construct(alloc, new_segment + i, nullptr); } return new_segment; } segment_type nullify_segment( typename base_type::segment_table_type table, size_type segment_index ) { segment_type target_segment = table[segment_index].load(std::memory_order_relaxed); table[segment_index].store(nullptr, std::memory_order_relaxed); return target_segment; } // deallocate_segment is required by the segment_table base class, but // in unordered, it is also necessary to call the destructor during deallocation void deallocate_segment( segment_type address, size_type index ) { destroy_segment(address, index); } void destroy_segment( segment_type address, size_type index ) { segment_allocator_type alloc(this->get_allocator()); for (size_type i = 0; i != this->segment_size(index); ++i) { segment_allocator_traits::destroy(alloc, address + i); } segment_allocator_traits::deallocate(alloc, address, this->segment_size(index)); } void copy_segment( size_type index, segment_type, segment_type to ) { if (index == 0) { // The first element in the first segment is embedded into the table (my_head) // so the first pointer should not be stored here // It would be stored during move ctor/assignment operation to[1].store(nullptr, std::memory_order_relaxed); } else { for (size_type i = 0; i != this->segment_size(index); ++i) { to[i].store(nullptr, std::memory_order_relaxed); } } } void move_segment( size_type index, segment_type from, segment_type to ) { if (index == 0) { // The first element in the first segment is embedded into the table (my_head) // so the first pointer should not be stored here // It would be stored during move ctor/assignment operation to[1].store(from[1].load(std::memory_order_relaxed), std::memory_order_relaxed); } else { for (size_type i = 0; i != this->segment_size(index); ++i) { to[i].store(from[i].load(std::memory_order_relaxed), std::memory_order_relaxed); from[i].store(nullptr, std::memory_order_relaxed); } } } // allocate_long_table is required by the segment_table base class, but unused for unordered containers typename base_type::segment_table_type allocate_long_table( const typename base_type::atomic_segment*, size_type ) { __TBB_ASSERT(false, "This method should never been called"); // TableType is a pointer return nullptr; } // destroy_elements is required by the segment_table base class, but unused for unordered containers // this function call but do nothing void destroy_elements() {} }; // struct unordered_segment_table void internal_clear() { // TODO: consider usefulness of two versions of clear() - with dummy nodes deallocation and without it node_ptr next = my_head.next(); node_ptr curr = next; my_head.set_next(nullptr); while (curr != nullptr) { next = curr->next(); destroy_node(curr); curr = next; } my_size.store(0, std::memory_order_relaxed); my_segments.clear(); } void destroy_node( node_ptr node ) { if (node->is_dummy()) { node_allocator_type dummy_node_allocator(my_segments.get_allocator()); // Destroy the node node_allocator_traits::destroy(dummy_node_allocator, node); // Deallocate the memory node_allocator_traits::deallocate(dummy_node_allocator, node, 1); } else { // GCC 11.1 issues a warning here that incorrect destructor might be called for dummy_nodes #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 120000 ) && !__clang__ && !__INTEL_COMPILER volatile #endif value_node_ptr val_node = static_cast(node); value_node_allocator_type value_node_allocator(my_segments.get_allocator()); // Destroy the value value_node_allocator_traits::destroy(value_node_allocator, val_node->storage()); // Destroy the node value_node_allocator_traits::destroy(value_node_allocator, val_node); // Deallocate the memory value_node_allocator_traits::deallocate(value_node_allocator, val_node, 1); } } struct internal_insert_return_type { // If the insertion failed - the remaining_node points to the node, which was failed to insert // This node can be allocated in process of insertion value_node_ptr remaining_node; // If the insertion failed - node_with_equal_key points to the node in the list with the // key, equivalent to the inserted, otherwise it points to the node, which was inserted. value_node_ptr node_with_equal_key; // Insertion status // NOTE: if it is true - remaining_node should be nullptr bool inserted; }; // struct internal_insert_return_type // Inserts the value into the split ordered list template std::pair internal_insert_value( ValueType&& value ) { auto create_value_node = [&value, this]( sokey_type order_key )->value_node_ptr { return create_node(order_key, std::forward(value)); }; auto insert_result = internal_insert(value, create_value_node); if (insert_result.remaining_node != nullptr) { // If the insertion fails - destroy the node which was failed to insert if it exist __TBB_ASSERT(!insert_result.inserted, "remaining_node should be nullptr if the node was successfully inserted"); destroy_node(insert_result.remaining_node); } return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; } // Inserts the node into the split ordered list // Creates a node using the specified callback after the place for insertion was found // Returns internal_insert_return_type object, where: // - If the insertion succeeded: // - remaining_node is nullptr // - node_with_equal_key point to the inserted node // - inserted is true // - If the insertion failed: // - remaining_node points to the node, that was failed to insert if it was created. // nullptr if the node was not created, because the requested key was already // presented in the list // - node_with_equal_key point to the element in the list with the key, equivalent to // to the requested key // - inserted is false template internal_insert_return_type internal_insert( ValueType&& value, CreateInsertNode create_insert_node ) { static_assert(std::is_same::type, value_type>::value, "Incorrect type in internal_insert"); const key_type& key = traits_type::get_key(value); sokey_type hash_key = sokey_type(my_hash_compare(key)); sokey_type order_key = split_order_key_regular(hash_key); node_ptr prev = prepare_bucket(hash_key); __TBB_ASSERT(prev != nullptr, "Invalid head node"); auto search_result = search_after(prev, order_key, key); if (search_result.second) { return internal_insert_return_type{ nullptr, search_result.first, false }; } value_node_ptr new_node = create_insert_node(order_key); node_ptr curr = search_result.first; while (!try_insert(prev, new_node, curr)) { search_result = search_after(prev, order_key, key); if (search_result.second) { return internal_insert_return_type{ new_node, search_result.first, false }; } curr = search_result.first; } auto sz = my_size.fetch_add(1); adjust_table_size(sz + 1, my_bucket_count.load(std::memory_order_acquire)); return internal_insert_return_type{ nullptr, static_cast(new_node), true }; } // Searches the node with the key, equivalent to key with requested order key after the node prev // Returns the existing node and true if the node is already in the list // Returns the first node with the order key, greater than requested and false if the node is not presented in the list std::pair search_after( node_ptr& prev, sokey_type order_key, const key_type& key ) { // NOTE: static_cast(curr) should be done only after we would ensure // that the node is not a dummy node node_ptr curr = prev->next(); while (curr != nullptr && (curr->order_key() < order_key || (curr->order_key() == order_key && !my_hash_compare(traits_type::get_key(static_cast(curr)->value()), key)))) { prev = curr; curr = curr->next(); } if (curr != nullptr && curr->order_key() == order_key && !allow_multimapping) { return { static_cast(curr), true }; } return { static_cast(curr), false }; } void adjust_table_size( size_type total_elements, size_type current_size ) { // Grow the table by a factor of 2 if possible and needed if ( (float(total_elements) / float(current_size)) > my_max_load_factor ) { // Double the size of the hash only if size hash not changed in between loads my_bucket_count.compare_exchange_strong(current_size, 2u * current_size); } } node_ptr insert_dummy_node( node_ptr parent_dummy_node, sokey_type order_key ) { node_ptr prev_node = parent_dummy_node; node_ptr dummy_node = create_dummy_node(order_key); node_ptr next_node; do { next_node = prev_node->next(); // Move forward through the list while the order key is less than requested while (next_node != nullptr && next_node->order_key() < order_key) { prev_node = next_node; next_node = next_node->next(); } if (next_node != nullptr && next_node->order_key() == order_key) { // Another dummy node with the same order key was inserted by another thread // Destroy the node and exit destroy_node(dummy_node); return next_node; } } while (!try_insert(prev_node, dummy_node, next_node)); return dummy_node; } // Try to insert a node between prev_node and expected next // If the next is not equal to expected next - return false static bool try_insert( node_ptr prev_node, node_ptr new_node, node_ptr current_next_node ) { new_node->set_next(current_next_node); return prev_node->try_set_next(current_next_node, new_node); } // Returns the bucket, associated with the hash_key node_ptr prepare_bucket( sokey_type hash_key ) { size_type bucket = hash_key % my_bucket_count.load(std::memory_order_acquire); return get_bucket(bucket); } // Initialize the corresponding bucket if it is not initialized node_ptr get_bucket( size_type bucket_index ) { if (my_segments[bucket_index].load(std::memory_order_acquire) == nullptr) { init_bucket(bucket_index); } return my_segments[bucket_index].load(std::memory_order_acquire); } void init_bucket( size_type bucket ) { if (bucket == 0) { // Atomicaly store the first bucket into my_head node_ptr disabled = nullptr; my_segments[0].compare_exchange_strong(disabled, &my_head); return; } size_type parent_bucket = get_parent(bucket); while (my_segments[parent_bucket].load(std::memory_order_acquire) == nullptr) { // Initialize all of the parent buckets init_bucket(parent_bucket); } __TBB_ASSERT(my_segments[parent_bucket].load(std::memory_order_acquire) != nullptr, "Parent bucket should be initialized"); node_ptr parent = my_segments[parent_bucket].load(std::memory_order_acquire); // Insert dummy node into the list node_ptr dummy_node = insert_dummy_node(parent, split_order_key_dummy(bucket)); // TODO: consider returning pair to avoid store operation if the bucket was stored by an other thread // or move store to insert_dummy_node // Add dummy_node into the segment table my_segments[bucket].store(dummy_node, std::memory_order_release); } node_ptr create_dummy_node( sokey_type order_key ) { node_allocator_type dummy_node_allocator(my_segments.get_allocator()); node_ptr dummy_node = node_allocator_traits::allocate(dummy_node_allocator, 1); node_allocator_traits::construct(dummy_node_allocator, dummy_node, order_key); return dummy_node; } template value_node_ptr create_node( sokey_type order_key, Args&&... args ) { value_node_allocator_type value_node_allocator(my_segments.get_allocator()); // Allocate memory for the value_node value_node_ptr new_node = value_node_allocator_traits::allocate(value_node_allocator, 1); // Construct the node value_node_allocator_traits::construct(value_node_allocator, new_node, order_key); // try_call API is not convenient here due to broken // variadic capture on GCC 4.8.5 auto value_guard = make_raii_guard([&] { value_node_allocator_traits::destroy(value_node_allocator, new_node); value_node_allocator_traits::deallocate(value_node_allocator, new_node, 1); }); // Construct the value in the node value_node_allocator_traits::construct(value_node_allocator, new_node->storage(), std::forward(args)...); value_guard.dismiss(); return new_node; } value_node_ptr first_value_node( node_ptr first_node ) const { while (first_node != nullptr && first_node->is_dummy()) { first_node = first_node->next(); } return static_cast(first_node); } // Unsafe method, which removes the node from the list and returns the next node node_ptr internal_erase( value_node_ptr node_to_erase ) { __TBB_ASSERT(node_to_erase != nullptr, "Invalid iterator for erase"); node_ptr next_node = node_to_erase->next(); internal_extract(node_to_erase); destroy_node(node_to_erase); return next_node; } template size_type internal_erase_by_key( const K& key ) { // TODO: consider reimplementation without equal_range - it is not effective to perform lookup over a bucket // for each unsafe_erase call auto eq_range = equal_range(key); size_type erased_count = 0; for (auto it = eq_range.first; it != eq_range.second;) { it = unsafe_erase(it); ++erased_count; } return erased_count; } // Unsafe method, which extracts the node from the list void internal_extract( value_node_ptr node_to_extract ) { const key_type& key = traits_type::get_key(node_to_extract->value()); sokey_type hash_key = sokey_type(my_hash_compare(key)); node_ptr prev_node = prepare_bucket(hash_key); for (node_ptr node = prev_node->next(); node != nullptr; prev_node = node, node = node->next()) { if (node == node_to_extract) { unlink_node(prev_node, node, node_to_extract->next()); my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); return; } __TBB_ASSERT(node->order_key() <= node_to_extract->order_key(), "node, which is going to be extracted should be presented in the list"); } } protected: template void internal_merge( SourceType&& source ) { static_assert(std::is_same::type::node_type>::value, "Incompatible containers cannot be merged"); for (node_ptr source_prev = &source.my_head; source_prev->next() != nullptr;) { if (!source_prev->next()->is_dummy()) { value_node_ptr curr = static_cast(source_prev->next()); // If the multimapping is allowed, or the key is not presented // in the *this container - extract the node from the list if (allow_multimapping || !contains(traits_type::get_key(curr->value()))) { node_ptr next_node = curr->next(); source.unlink_node(source_prev, curr, next_node); // Remember the old order key sokey_type old_order_key = curr->order_key(); // Node handle with curr cannot be used directly in insert call, because // the destructor of node_type will destroy curr node_type curr_node = node_handle_accessor::construct(curr); // If the insertion fails - return ownership of the node to the source if (!insert(std::move(curr_node)).second) { __TBB_ASSERT(!allow_multimapping, "Insertion should succeed for multicontainer"); __TBB_ASSERT(source_prev->next() == next_node, "Concurrent operations with the source container in merge are prohibited"); // Initialize the node with the old order key, because the order key // can change during the insertion curr->init(old_order_key); __TBB_ASSERT(old_order_key >= source_prev->order_key() && (next_node == nullptr || old_order_key <= next_node->order_key()), "Wrong nodes order in the source container"); // Merge is unsafe for source container, so the insertion back can be done without compare_exchange curr->set_next(next_node); source_prev->set_next(curr); source_prev = curr; node_handle_accessor::deactivate(curr_node); } else { source.my_size.fetch_sub(1, std::memory_order_relaxed); } } else { source_prev = curr; } } else { source_prev = source_prev->next(); } } } private: // Unsafe method, which unlinks the node between prev and next void unlink_node( node_ptr prev_node, node_ptr node_to_unlink, node_ptr next_node ) { __TBB_ASSERT(prev_node->next() == node_to_unlink && node_to_unlink->next() == next_node, "erasing and extracting nodes from the containers are unsafe in concurrent mode"); prev_node->set_next(next_node); node_to_unlink->set_next(nullptr); } template value_node_ptr internal_find( const K& key ) { sokey_type hash_key = sokey_type(my_hash_compare(key)); sokey_type order_key = split_order_key_regular(hash_key); node_ptr curr = prepare_bucket(hash_key); while (curr != nullptr) { if (curr->order_key() > order_key) { // If the order key is greater than the requested order key, // the element is not in the hash table return nullptr; } else if (curr->order_key() == order_key && my_hash_compare(traits_type::get_key(static_cast(curr)->value()), key)) { // The fact that order keys match does not mean that the element is found. // Key function comparison has to be performed to check whether this is the // right element. If not, keep searching while order key is the same. return static_cast(curr); } curr = curr->next(); } return nullptr; } template std::pair internal_equal_range( const K& key ) { sokey_type hash_key = sokey_type(my_hash_compare(key)); sokey_type order_key = split_order_key_regular(hash_key); node_ptr curr = prepare_bucket(hash_key); while (curr != nullptr) { if (curr->order_key() > order_key) { // If the order key is greater than the requested order key, // the element is not in the hash table return std::make_pair(nullptr, nullptr); } else if (curr->order_key() == order_key && my_hash_compare(traits_type::get_key(static_cast(curr)->value()), key)) { value_node_ptr first = static_cast(curr); node_ptr last = first; do { last = last->next(); } while (allow_multimapping && last != nullptr && !last->is_dummy() && my_hash_compare(traits_type::get_key(static_cast(last)->value()), key)); return std::make_pair(first, first_value_node(last)); } curr = curr->next(); } return {nullptr, nullptr}; } template size_type internal_count( const K& key ) const { if (allow_multimapping) { // TODO: consider reimplementing the internal_equal_range with elements counting to avoid std::distance auto eq_range = equal_range(key); return std::distance(eq_range.first, eq_range.second); } else { return contains(key) ? 1 : 0; } } void internal_copy( const concurrent_unordered_base& other ) { node_ptr last_node = &my_head; my_segments[0].store(&my_head, std::memory_order_relaxed); for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { node_ptr new_node; if (!node->is_dummy()) { // The node in the right table contains a value new_node = create_node(node->order_key(), static_cast(node)->value()); } else { // The node in the right table is a dummy node new_node = create_dummy_node(node->order_key()); my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); } last_node->set_next(new_node); last_node = new_node; } } void internal_move( concurrent_unordered_base&& other ) { node_ptr last_node = &my_head; my_segments[0].store(&my_head, std::memory_order_relaxed); for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { node_ptr new_node; if (!node->is_dummy()) { // The node in the right table contains a value new_node = create_node(node->order_key(), std::move(static_cast(node)->value())); } else { // TODO: do we need to destroy a dummy node in the right container? // The node in the right table is a dummy_node new_node = create_dummy_node(node->order_key()); my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); } last_node->set_next(new_node); last_node = new_node; } } void move_content( concurrent_unordered_base&& other ) { // NOTE: allocators should be equal my_head.set_next(other.my_head.next()); other.my_head.set_next(nullptr); my_segments[0].store(&my_head, std::memory_order_relaxed); other.my_bucket_count.store(initial_bucket_count, std::memory_order_relaxed); other.my_max_load_factor = initial_max_load_factor; other.my_size.store(0, std::memory_order_relaxed); } void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type&, /*is_always_equal = */std::true_type ) { // Allocators are always equal - no need to compare for equality move_content(std::move(other)); } void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type& alloc, /*is_always_equal = */std::false_type ) { // Allocators are not always equal if (alloc == other.my_segments.get_allocator()) { move_content(std::move(other)); } else { try_call( [&] { internal_move(std::move(other)); } ).on_exception( [&] { clear(); }); } } // Move assigns the hash table to other is any instances of allocator_type are always equal // or propagate_on_container_move_assignment is true void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::true_type ) { move_content(std::move(other)); } // Move assigns the hash table to other is any instances of allocator_type are not always equal // and propagate_on_container_move_assignment is false void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::false_type ) { if (my_segments.get_allocator() == other.my_segments.get_allocator()) { move_content(std::move(other)); } else { // TODO: guards for exceptions internal_move(std::move(other)); } } void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::true_type ) { internal_swap_fields(other); } void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::false_type ) { __TBB_ASSERT(my_segments.get_allocator() == other.my_segments.get_allocator(), "Swapping with unequal allocators is not allowed"); internal_swap_fields(other); } void internal_swap_fields( concurrent_unordered_base& other ) { node_ptr first_node = my_head.next(); my_head.set_next(other.my_head.next()); other.my_head.set_next(first_node); size_type current_size = my_size.load(std::memory_order_relaxed); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_size.store(current_size, std::memory_order_relaxed); size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); other.my_bucket_count.store(bucket_count, std::memory_order_relaxed); using std::swap; swap(my_max_load_factor, other.my_max_load_factor); swap(my_hash_compare, other.my_hash_compare); my_segments.swap(other.my_segments); // swap() method from segment table swaps all of the segments including the first segment // We should restore it to my_head. Without it the first segment of the container will point // to other.my_head. my_segments[0].store(&my_head, std::memory_order_relaxed); other.my_segments[0].store(&other.my_head, std::memory_order_relaxed); } // A regular order key has its original hash value reversed and the last bit set static constexpr sokey_type split_order_key_regular( sokey_type hash ) { return reverse_bits(hash) | 0x1; } // A dummy order key has its original hash value reversed and the last bit unset static constexpr sokey_type split_order_key_dummy( sokey_type hash ) { return reverse_bits(hash) & ~sokey_type(0x1); } size_type get_parent( size_type bucket ) const { // Unset bucket's most significant turned-on bit __TBB_ASSERT(bucket != 0, "Unable to get_parent of the bucket 0"); size_type msb = tbb::detail::log2(bucket); return bucket & ~(size_type(1) << msb); } size_type get_next_bucket_index( size_type bucket ) const { size_type bits = tbb::detail::log2(my_bucket_count.load(std::memory_order_relaxed)); size_type reversed_next = reverse_n_bits(bucket, bits) + 1; return reverse_n_bits(reversed_next, bits); } std::atomic my_size; std::atomic my_bucket_count; float my_max_load_factor; hash_compare_type my_hash_compare; list_node_type my_head; // Head node for split ordered list unordered_segment_table my_segments; // Segment table of pointers to nodes template friend class solist_iterator; template friend class concurrent_unordered_base; }; // class concurrent_unordered_base template bool operator==( const concurrent_unordered_base& lhs, const concurrent_unordered_base& rhs ) { if (&lhs == &rhs) { return true; } if (lhs.size() != rhs.size()) { return false; } #if _MSC_VER // Passing "unchecked" iterators to std::permutation with 3 parameters // causes compiler warnings. // The workaround is to use overload with 4 parameters, which is // available since C++14 - minimally supported version on MSVC return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); #else return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); #endif } #if !__TBB_CPP20_COMPARISONS_PRESENT template bool operator!=( const concurrent_unordered_base& lhs, const concurrent_unordered_base& rhs ) { return !(lhs == rhs); } #endif #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning(pop) // warning 4127 is back #endif } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__concurrent_unordered_base_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_config.h000066400000000000000000000473471514453371700316360ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__config_H #define __TBB_detail__config_H /** This header is supposed to contain macro definitions only. The macros defined here are intended to control such aspects of TBB build as - presence of compiler features - compilation modes - feature sets - known compiler/platform issues **/ /* Check which standard library we use. */ #include #include "_export.h" #if _MSC_VER #define __TBB_EXPORTED_FUNC __cdecl #define __TBB_EXPORTED_METHOD __thiscall #else #define __TBB_EXPORTED_FUNC #define __TBB_EXPORTED_METHOD #endif #if defined(_MSVC_LANG) #define __TBB_LANG _MSVC_LANG #else #define __TBB_LANG __cplusplus #endif // _MSVC_LANG #define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L) #define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP20_PRESENT (__TBB_LANG >= 202002L) #if __INTEL_COMPILER || _MSC_VER #define __TBB_NOINLINE(decl) __declspec(noinline) decl #elif __GNUC__ #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline)) #else #define __TBB_NOINLINE(decl) decl #endif #define __TBB_STRING_AUX(x) #x #define __TBB_STRING(x) __TBB_STRING_AUX(x) // Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match // the actual GCC version on the system. #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) /* Check which standard library we use. */ // Prior to GCC 7, GNU libstdc++ did not have a convenient version macro. // Therefore we use different ways to detect its version. #ifdef TBB_USE_GLIBCXX_VERSION // The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro. // Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1. #define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION #elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__ // Reported versions of GCC and libstdc++ do not match; trust the latter #define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000) #elif __GLIBCPP__ || __GLIBCXX__ // The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION #define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION #endif #if __clang__ // according to clang documentation, version can be vendor specific #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) #endif /** Macro helpers **/ #define __TBB_CONCAT_AUX(A,B) A##B // The additional level of indirection is needed to expand macros A and B (not to get the AB macro). // See [cpp.subst] and [cpp.concat] for more details. #define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B) // The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty). #define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A) #define __TBB_MACRO_EMPTY 1 #if _M_X64 || _M_ARM64 #define __TBB_W(name) name##64 #else #define __TBB_W(name) name #endif /** User controlled TBB features & modes **/ #ifndef TBB_USE_DEBUG /* There are four cases that are supported: 1. "_DEBUG is undefined" means "no debug"; 2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug"; 3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug"; 4. "_DEBUG defined to nothing (empty)" means "debug". */ #ifdef _DEBUG // Check if _DEBUG is empty. #define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY) #if __TBB_IS__DEBUG_EMPTY #define TBB_USE_DEBUG 1 #else #define TBB_USE_DEBUG _DEBUG #endif // __TBB_IS__DEBUG_EMPTY #else #define TBB_USE_DEBUG 0 #endif // _DEBUG #endif // TBB_USE_DEBUG #ifndef TBB_USE_ASSERT #define TBB_USE_ASSERT TBB_USE_DEBUG #endif // TBB_USE_ASSERT #ifndef TBB_USE_PROFILING_TOOLS #if TBB_USE_DEBUG #define TBB_USE_PROFILING_TOOLS 2 #else // TBB_USE_DEBUG #define TBB_USE_PROFILING_TOOLS 0 #endif // TBB_USE_DEBUG #endif // TBB_USE_PROFILING_TOOLS // Exceptions support cases #if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC) #if TBB_USE_EXCEPTIONS #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0. #elif !defined(TBB_USE_EXCEPTIONS) #define TBB_USE_EXCEPTIONS 0 #endif #elif !defined(TBB_USE_EXCEPTIONS) #define TBB_USE_EXCEPTIONS 1 #endif /** Preprocessor symbols to determine HW architecture **/ #if _WIN32 || _WIN64 #if defined(_M_X64) || defined(__x86_64__) // the latter for MinGW support #define __TBB_x86_64 1 #elif defined(_M_IA64) #define __TBB_ipf 1 #elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support #define __TBB_x86_32 1 #else #define __TBB_generic_arch 1 #endif #else /* Assume generic Unix */ #if __x86_64__ #define __TBB_x86_64 1 #elif __ia64__ #define __TBB_ipf 1 #elif __i386__||__i386 // __i386 is for Sun OS #define __TBB_x86_32 1 #else #define __TBB_generic_arch 1 #endif #endif /** Windows API or POSIX API **/ #if _WIN32 || _WIN64 #define __TBB_USE_WINAPI 1 #else #define __TBB_USE_POSIX 1 #endif /** Internal TBB features & modes **/ /** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/ #ifndef __TBB_DYNAMIC_LOAD_ENABLED #define __TBB_DYNAMIC_LOAD_ENABLED 1 #endif /** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load shared libraries at run time only from application container **/ #if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP #define __TBB_WIN8UI_SUPPORT 1 #else #define __TBB_WIN8UI_SUPPORT 0 #endif /** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/ #ifndef __TBB_WEAK_SYMBOLS_PRESENT #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) #endif /** Presence of compiler features **/ #if __clang__ && !__INTEL_COMPILER #define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti) #elif defined(_CPPRTTI) #define __TBB_USE_OPTIONAL_RTTI 1 #else #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__) #endif /** Address sanitizer detection **/ #ifdef __SANITIZE_ADDRESS__ #define __TBB_USE_ADDRESS_SANITIZER 1 #elif defined(__has_feature) #if __has_feature(address_sanitizer) #define __TBB_USE_ADDRESS_SANITIZER 1 #endif #endif /** Library features presence macros **/ #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__TBB_LANG >= 201402L) #define __TBB_CPP17_INVOKE_RESULT_PRESENT (__TBB_LANG >= 201703L) // TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT // macro when this feature start working correctly on this compiler. #if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__) #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L) #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition #elif __clang__ #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates)) #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition #ifdef __cpp_deduction_guides #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L) #else #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0 #endif #elif __GNUC__ #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000) #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606L) #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201) #elif _MSC_VER #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700)) #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021)) #define __TBB_CPP20_CONCEPTS_PRESENT (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER? #else #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 202002L) #endif // GCC4.8 on RHEL7 does not support std::get_new_handler #define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION) // GCC4.8 on RHEL7 does not support std::is_trivially_copyable #define __TBB_CPP11_TYPE_PROPERTIES_PRESENT (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__)) #define __TBB_CPP17_MEMORY_RESOURCE_PRESENT (_MSC_VER >= 1913 && (__TBB_LANG > 201402L) || \ __TBB_GLIBCXX_VERSION >= 90000 && __TBB_LANG >= 201703L) #define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT (_MSC_VER >= 1911) #define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP17_IS_SWAPPABLE_PRESENT (__TBB_LANG >= 201703L) #if defined(__cpp_impl_three_way_comparison) && defined(__cpp_lib_three_way_comparison) #define __TBB_CPP20_COMPARISONS_PRESENT ((__cpp_impl_three_way_comparison >= 201907L) && (__cpp_lib_three_way_comparison >= 201907L)) #else #define __TBB_CPP20_COMPARISONS_PRESENT __TBB_CPP20_PRESENT #endif #define __TBB_RESUMABLE_TASKS (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__QNXNTO__) /* This macro marks incomplete code or comments describing ideas which are considered for the future. * See also for plain comment with TODO and FIXME marks for small improvement opportunities. */ #define __TBB_TODO 0 /* Check which standard library we use. */ /* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */ #if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY #include #endif /** Target OS is either iOS* or iOS* simulator **/ #if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ #define __TBB_IOS 1 #endif #if __APPLE__ #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \ && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000 // ICC does not correctly set the macro if -mmacosx-min-version is not specified #define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000)) #else #define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ #endif #endif #if defined(__GNUC__) && !defined(__INTEL_COMPILER) #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100) #endif #if __GNUC__ && !__INTEL_COMPILER && !__clang__ #define __TBB_GCC_PARAMETER_PACK_IN_LAMBDAS_BROKEN (__TBB_GCC_VERSION <= 40805) #endif #define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP17_NODISCARD_PRESENT (__TBB_LANG >= 201703L) #define __TBB_FALLTHROUGH_PRESENT (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER) #if __TBB_CPP17_FALLTHROUGH_PRESENT #define __TBB_fallthrough [[fallthrough]] #elif __TBB_FALLTHROUGH_PRESENT #define __TBB_fallthrough __attribute__ ((fallthrough)) #else #define __TBB_fallthrough #endif #if __TBB_CPP17_NODISCARD_PRESENT #define __TBB_nodiscard [[nodiscard]] #elif __clang__ || __GNUC__ #define __TBB_nodiscard __attribute__((warn_unused_result)) #else #define __TBB_nodiscard #endif #define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200)) #define __TBB_TSX_INTRINSICS_PRESENT (__RTM__ || __INTEL_COMPILER || (_MSC_VER>=1700 && (__TBB_x86_64 || __TBB_x86_32))) #define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) \ && (_WIN32 || _WIN64 || __unix__ || __APPLE__) && (__TBB_x86_32 || __TBB_x86_64) && !__ANDROID__) /** Internal TBB features & modes **/ /** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when it's necessary to test internal functions not exported from TBB DLLs **/ #if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY) #define __TBB_NO_IMPLICIT_LINKAGE 1 #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 #endif #if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE) #define __TBB_NO_IMPLICIT_LINKAGE 1 #endif #if _MSC_VER #if !__TBB_NO_IMPLICIT_LINKAGE #ifdef _DEBUG #pragma comment(lib, "tbb12_debug.lib") #else #pragma comment(lib, "tbb12.lib") #endif #endif #endif #ifndef __TBB_SCHEDULER_OBSERVER #define __TBB_SCHEDULER_OBSERVER 1 #endif /* __TBB_SCHEDULER_OBSERVER */ #ifndef __TBB_FP_CONTEXT #define __TBB_FP_CONTEXT 1 #endif /* __TBB_FP_CONTEXT */ #define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official #ifndef __TBB_ARENA_OBSERVER #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER #endif /* __TBB_ARENA_OBSERVER */ #ifndef __TBB_ARENA_BINDING #define __TBB_ARENA_BINDING 1 #endif #if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 #endif #ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 #endif #if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \ (_WIN32 || _WIN64 || __APPLE__ || (__unix__ && !__ANDROID__)) #define __TBB_SURVIVE_THREAD_SWITCH 1 #endif /* __TBB_SURVIVE_THREAD_SWITCH */ #ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES #define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD #endif #ifndef __TBB_DEFAULT_PARTITIONER #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner #endif #ifndef __TBB_FLOW_TRACE_CODEPTR #define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD #endif // Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template // instantiation site, which is too late for suppression of the corresponding messages for internal // stuff. #if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) #if (__TBB_LANG >= 201402L && (!defined(_MSC_VER) || _MSC_VER >= 1920)) #define __TBB_DEPRECATED [[deprecated]] #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]] #elif _MSC_VER #define __TBB_DEPRECATED __declspec(deprecated) #define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) #elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__ #define __TBB_DEPRECATED __attribute__((deprecated)) #define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) #endif #endif // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) #if !defined(__TBB_DEPRECATED) #define __TBB_DEPRECATED #define __TBB_DEPRECATED_MSG(msg) #elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES) // Suppress deprecated messages from self #define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1 #endif #if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) #define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED #define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg) #else #define __TBB_DEPRECATED_VERBOSE #define __TBB_DEPRECATED_VERBOSE_MSG(msg) #endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) #if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900) #pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.") #endif #ifdef _VARIADIC_MAX #define __TBB_VARIADIC_MAX _VARIADIC_MAX #else #if _MSC_VER == 1700 #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12 #elif _MSC_VER == 1600 #define __TBB_VARIADIC_MAX 10 // VS10 setting #else #define __TBB_VARIADIC_MAX 15 #endif #endif #if __SANITIZE_THREAD__ #define __TBB_USE_THREAD_SANITIZER 1 #elif defined(__has_feature) #if __has_feature(thread_sanitizer) #define __TBB_USE_THREAD_SANITIZER 1 #endif #endif #ifndef __TBB_USE_SANITIZERS #define __TBB_USE_SANITIZERS (__TBB_USE_THREAD_SANITIZER || __TBB_USE_ADDRESS_SANITIZER) #endif #ifndef __TBB_RESUMABLE_TASKS_USE_THREADS #define __TBB_RESUMABLE_TASKS_USE_THREADS __TBB_USE_SANITIZERS #endif #ifndef __TBB_USE_CONSTRAINTS #define __TBB_USE_CONSTRAINTS 1 #endif #ifndef __TBB_STRICT_CONSTRAINTS #define __TBB_STRICT_CONSTRAINTS 1 #endif #if __TBB_CPP20_CONCEPTS_PRESENT && __TBB_USE_CONSTRAINTS #define __TBB_requires(...) requires __VA_ARGS__ #else // __TBB_CPP20_CONCEPTS_PRESENT #define __TBB_requires(...) #endif // __TBB_CPP20_CONCEPTS_PRESENT /** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by the bugs in compilers, standard or OS specific libraries. They should be removed as soon as the corresponding bugs are fixed or the buggy OS/compiler versions go out of the support list. **/ // Some STL containers not support allocator traits in old GCC versions #if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301 #define TBB_ALLOCATOR_TRAITS_BROKEN 1 #endif // GCC 4.8 C++ standard library implements std::this_thread::yield as no-op. #if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 #define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1 #endif /** End of __TBB_XXX_BROKEN macro section **/ #if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER) // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702) #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1 #endif // Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang // Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19) #if __ANDROID__ #include #endif #define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES) #ifndef __TBB_PREVIEW_CRITICAL_TASKS #define __TBB_PREVIEW_CRITICAL_TASKS 1 #endif #ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET #define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES) #endif #if TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS #define __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS 1 #endif #if TBB_PREVIEW_TASK_GROUP_EXTENSIONS || __TBB_BUILD #define __TBB_PREVIEW_TASK_GROUP_EXTENSIONS 1 #endif #endif // __TBB_detail__config_H _containers_helpers.h000066400000000000000000000051031514453371700341610ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__containers_helpers_H #define __TBB_detail__containers_helpers_H #include "_template_helpers.h" #include "_allocator_traits.h" #include #include #include namespace tbb { namespace detail { inline namespace d0 { template struct comp_is_transparent : std::false_type {}; template struct comp_is_transparent> : std::true_type {}; template struct has_transparent_key_equal : std::false_type { using type = KeyEqual; }; template struct has_transparent_key_equal> : std::true_type { using type = typename Hasher::transparent_key_equal; static_assert(comp_is_transparent::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type."); static_assert((std::is_same>::value || std::is_same::value), "KeyEqual is a different type than equal_to or Hash::transparent_key_equal."); }; struct is_iterator_impl { template using iter_traits_category = typename std::iterator_traits::iterator_category; template using input_iter_category = typename std::enable_if>::value>::type; }; // struct is_iterator_impl template using is_input_iterator = supports; #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template inline constexpr bool is_input_iterator_v = is_input_iterator::value; #endif } // inline namespace d0 } // namespace detail } // namespace tbb #endif // __TBB_detail__containers_helpers_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_exception.h000066400000000000000000000045701514453371700323560ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__exception_H #define __TBB__exception_H #include "_config.h" #include // std::bad_alloc #include // std::exception #include // std::runtime_error namespace tbb { namespace detail { inline namespace d0 { enum class exception_id { bad_alloc = 1, bad_last_alloc, user_abort, nonpositive_step, out_of_range, reservation_length_error, missing_wait, invalid_load_factor, invalid_key, bad_tagged_msg_cast, unsafe_wait, last_entry }; } // namespace d0 #if _MSC_VER #pragma warning(disable: 4275) #endif namespace r1 { //! Exception for concurrent containers class TBB_EXPORT bad_last_alloc : public std::bad_alloc { public: const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; }; //! Exception for user-initiated abort class TBB_EXPORT user_abort : public std::exception { public: const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; }; //! Exception for missing wait on structured_task_group class TBB_EXPORT missing_wait : public std::exception { public: const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; }; //! Exception for impossible finalization of task_sheduler_handle class TBB_EXPORT unsafe_wait : public std::runtime_error { public: unsafe_wait(const char* msg) : std::runtime_error(msg) {} }; //! Gathers all throw operators in one place. /** Its purpose is to minimize code bloat that can be caused by throw operators scattered in multiple places, especially in templates. **/ TBB_EXPORT void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); } // namespace r1 inline namespace d0 { using r1::throw_exception; } // namespace d0 } // namespace detail } // namespace tbb #endif // __TBB__exception_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_export.h000066400000000000000000000022571514453371700317010ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__export_H #define __TBB_detail__export_H #if defined(__MINGW32__) #define _EXPORT __declspec(dllexport) #elif defined(_WIN32) || defined(__unix__) || defined(__APPLE__) // Use .def files for these #define _EXPORT #else #error "Unknown platform/compiler" #endif #if __TBB_BUILD #define TBB_EXPORT _EXPORT #else #define TBB_EXPORT #endif #if __TBBMALLOC_BUILD #define TBBMALLOC_EXPORT _EXPORT #else #define TBBMALLOC_EXPORT #endif #if __TBBBIND_BUILD #define TBBBIND_EXPORT _EXPORT #else #define TBBBIND_EXPORT #endif #endif _flow_graph_body_impl.h000066400000000000000000000316371514453371700344730ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_body_impl_H #define __TBB__flow_graph_body_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // included in namespace tbb::detail::d1 (in flow_graph.h) typedef std::uint64_t tag_value; // TODO revamp: find out if there is already helper for has_policy. template struct Policy {}; template struct has_policy; template struct has_policy : std::integral_constant::value || has_policy::value> {}; template struct has_policy : std::integral_constant::value> {}; template struct has_policy > : has_policy {}; namespace graph_policy_namespace { struct rejecting { }; struct reserving { }; struct queueing { }; struct lightweight { }; // K == type of field used for key-matching. Each tag-matching port will be provided // functor that, given an object accepted by the port, will return the /// field of type K being used for matching. template::type > > __TBB_requires(tbb::detail::hash_compare) struct key_matching { typedef K key_type; typedef typename std::decay::type base_key_type; typedef KHash hash_compare_type; }; // old tag_matching join's new specifier typedef key_matching tag_matching; // Aliases for Policy combinations typedef Policy queueing_lightweight; typedef Policy rejecting_lightweight; } // namespace graph_policy_namespace // -------------- function_body containers ---------------------- //! A functor that takes no input and generates a value of type Output template< typename Output > class input_body : no_assign { public: virtual ~input_body() {} virtual Output operator()(flow_control& fc) = 0; virtual input_body* clone() = 0; }; //! The leaf for input_body template< typename Output, typename Body> class input_body_leaf : public input_body { public: input_body_leaf( const Body &_body ) : body(_body) { } Output operator()(flow_control& fc) override { return body(fc); } input_body_leaf* clone() override { return new input_body_leaf< Output, Body >(body); } Body get_body() { return body; } private: Body body; }; //! A functor that takes an Input and generates an Output template< typename Input, typename Output > class function_body : no_assign { public: virtual ~function_body() {} virtual Output operator()(const Input &input) = 0; virtual function_body* clone() = 0; }; //! the leaf for function_body template class function_body_leaf : public function_body< Input, Output > { public: function_body_leaf( const B &_body ) : body(_body) { } Output operator()(const Input &i) override { return body(i); } B get_body() { return body; } function_body_leaf* clone() override { return new function_body_leaf< Input, Output, B >(body); } private: B body; }; //! the leaf for function_body specialized for Input and output of continue_msg template class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > { public: function_body_leaf( const B &_body ) : body(_body) { } continue_msg operator()( const continue_msg &i ) override { body(i); return i; } B get_body() { return body; } function_body_leaf* clone() override { return new function_body_leaf< continue_msg, continue_msg, B >(body); } private: B body; }; //! the leaf for function_body specialized for Output of continue_msg template class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > { public: function_body_leaf( const B &_body ) : body(_body) { } continue_msg operator()(const Input &i) override { body(i); return continue_msg(); } B get_body() { return body; } function_body_leaf* clone() override { return new function_body_leaf< Input, continue_msg, B >(body); } private: B body; }; //! the leaf for function_body specialized for Input of continue_msg template class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > { public: function_body_leaf( const B &_body ) : body(_body) { } Output operator()(const continue_msg &i) override { return body(i); } B get_body() { return body; } function_body_leaf* clone() override { return new function_body_leaf< continue_msg, Output, B >(body); } private: B body; }; //! function_body that takes an Input and a set of output ports template class multifunction_body : no_assign { public: virtual ~multifunction_body () {} virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0; virtual multifunction_body* clone() = 0; virtual void* get_body_ptr() = 0; }; //! leaf for multifunction. OutputSet can be a std::tuple or a vector. template class multifunction_body_leaf : public multifunction_body { public: multifunction_body_leaf(const B &_body) : body(_body) { } void operator()(const Input &input, OutputSet &oset) override { body(input, oset); // body may explicitly put() to one or more of oset. } void* get_body_ptr() override { return &body; } multifunction_body_leaf* clone() override { return new multifunction_body_leaf(body); } private: B body; }; // ------ function bodies for hash_buffers and key-matching joins. template class type_to_key_function_body : no_assign { public: virtual ~type_to_key_function_body() {} virtual Output operator()(const Input &input) = 0; // returns an Output virtual type_to_key_function_body* clone() = 0; }; // specialization for ref output template class type_to_key_function_body : no_assign { public: virtual ~type_to_key_function_body() {} virtual const Output & operator()(const Input &input) = 0; // returns a const Output& virtual type_to_key_function_body* clone() = 0; }; template class type_to_key_function_body_leaf : public type_to_key_function_body { public: type_to_key_function_body_leaf( const B &_body ) : body(_body) { } Output operator()(const Input &i) override { return body(i); } type_to_key_function_body_leaf* clone() override { return new type_to_key_function_body_leaf< Input, Output, B>(body); } private: B body; }; template class type_to_key_function_body_leaf : public type_to_key_function_body< Input, Output&> { public: type_to_key_function_body_leaf( const B &_body ) : body(_body) { } const Output& operator()(const Input &i) override { return body(i); } type_to_key_function_body_leaf* clone() override { return new type_to_key_function_body_leaf< Input, Output&, B>(body); } private: B body; }; // --------------------------- end of function_body containers ------------------------ // --------------------------- node task bodies --------------------------------------- //! A task that calls a node's forward_task function template< typename NodeType > class forward_task_bypass : public graph_task { NodeType &my_node; public: forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n , node_priority_t node_priority = no_priority ) : graph_task(g, allocator, node_priority), my_node(n) {} task* execute(execution_data& ed) override { graph_task* next_task = my_node.forward_task(); if (SUCCESSFULLY_ENQUEUED == next_task) next_task = nullptr; else if (next_task) next_task = prioritize_task(my_node.graph_reference(), *next_task); finalize(ed); return next_task; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } }; //! A task that calls a node's apply_body_bypass function, passing in an input of type Input // return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL template< typename NodeType, typename Input > class apply_body_task_bypass : public graph_task { NodeType &my_node; Input my_input; public: apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i , node_priority_t node_priority = no_priority ) : graph_task(g, allocator, node_priority), my_node(n), my_input(i) {} task* execute(execution_data& ed) override { graph_task* next_task = my_node.apply_body_bypass( my_input ); if (SUCCESSFULLY_ENQUEUED == next_task) next_task = nullptr; else if (next_task) next_task = prioritize_task(my_node.graph_reference(), *next_task); finalize(ed); return next_task; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } }; //! A task that calls a node's apply_body_bypass function with no input template< typename NodeType > class input_node_task_bypass : public graph_task { NodeType &my_node; public: input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n ) : graph_task(g, allocator), my_node(n) {} task* execute(execution_data& ed) override { graph_task* next_task = my_node.apply_body_bypass( ); if (SUCCESSFULLY_ENQUEUED == next_task) next_task = nullptr; else if (next_task) next_task = prioritize_task(my_node.graph_reference(), *next_task); finalize(ed); return next_task; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } }; // ------------------------ end of node task bodies ----------------------------------- template class threshold_regulator; template class threshold_regulator::value>::type> : public receiver, no_copy { T* my_node; protected: graph_task* try_put_task( const DecrementType& value ) override { graph_task* result = my_node->decrement_counter( value ); if( !result ) result = SUCCESSFULLY_ENQUEUED; return result; } graph& graph_reference() const override { return my_node->my_graph; } template friend class limiter_node; void reset_receiver( reset_flags ) {} public: threshold_regulator(T* owner) : my_node(owner) { // Do not work with the passed pointer here as it may not be fully initialized yet } }; template class threshold_regulator : public continue_receiver, no_copy { T *my_node; graph_task* execute() override { return my_node->decrement_counter( 1 ); } protected: graph& graph_reference() const override { return my_node->my_graph; } public: typedef continue_msg input_type; typedef continue_msg output_type; threshold_regulator(T* owner) : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner) { // Do not work with the passed pointer here as it may not be fully initialized yet } }; #endif // __TBB__flow_graph_body_impl_H _flow_graph_cache_impl.h000066400000000000000000000324571514453371700346020ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_cache_impl_H #define __TBB__flow_graph_cache_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // included in namespace tbb::detail::d1 (in flow_graph.h) //! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock. template< typename T, typename M=spin_mutex > class node_cache { public: typedef size_t size_type; bool empty() { typename mutex_type::scoped_lock lock( my_mutex ); return internal_empty(); } void add( T &n ) { typename mutex_type::scoped_lock lock( my_mutex ); internal_push(n); } void remove( T &n ) { typename mutex_type::scoped_lock lock( my_mutex ); for ( size_t i = internal_size(); i != 0; --i ) { T &s = internal_pop(); if ( &s == &n ) break; // only remove one predecessor per request internal_push(s); } } void clear() { while( !my_q.empty()) (void)my_q.pop(); } protected: typedef M mutex_type; mutex_type my_mutex; std::queue< T * > my_q; // Assumes lock is held inline bool internal_empty( ) { return my_q.empty(); } // Assumes lock is held inline size_type internal_size( ) { return my_q.size(); } // Assumes lock is held inline void internal_push( T &n ) { my_q.push(&n); } // Assumes lock is held inline T &internal_pop() { T *v = my_q.front(); my_q.pop(); return *v; } }; //! A cache of predecessors that only supports try_get template< typename T, typename M=spin_mutex > class predecessor_cache : public node_cache< sender, M > { public: typedef M mutex_type; typedef T output_type; typedef sender predecessor_type; typedef receiver successor_type; predecessor_cache( successor_type* owner ) : my_owner( owner ) { __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." ); // Do not work with the passed pointer here as it may not be fully initialized yet } bool get_item( output_type& v ) { bool msg = false; do { predecessor_type *src; { typename mutex_type::scoped_lock lock(this->my_mutex); if ( this->internal_empty() ) { break; } src = &this->internal_pop(); } // Try to get from this sender msg = src->try_get( v ); if (msg == false) { // Relinquish ownership of the edge register_successor(*src, *my_owner); } else { // Retain ownership of the edge this->add(*src); } } while ( msg == false ); return msg; } // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). void reset() { for(;;) { predecessor_type *src; { if (this->internal_empty()) break; src = &this->internal_pop(); } register_successor(*src, *my_owner); } } protected: successor_type* my_owner; }; //! An cache of predecessors that supports requests and reservations template< typename T, typename M=spin_mutex > class reservable_predecessor_cache : public predecessor_cache< T, M > { public: typedef M mutex_type; typedef T output_type; typedef sender predecessor_type; typedef receiver successor_type; reservable_predecessor_cache( successor_type* owner ) : predecessor_cache(owner), reserved_src(nullptr) { // Do not work with the passed pointer here as it may not be fully initialized yet } bool try_reserve( output_type &v ) { bool msg = false; do { predecessor_type* pred = nullptr; { typename mutex_type::scoped_lock lock(this->my_mutex); if ( reserved_src.load(std::memory_order_relaxed) || this->internal_empty() ) return false; pred = &this->internal_pop(); reserved_src.store(pred, std::memory_order_relaxed); } // Try to get from this sender msg = pred->try_reserve( v ); if (msg == false) { typename mutex_type::scoped_lock lock(this->my_mutex); // Relinquish ownership of the edge register_successor( *pred, *this->my_owner ); reserved_src.store(nullptr, std::memory_order_relaxed); } else { // Retain ownership of the edge this->add( *pred); } } while ( msg == false ); return msg; } bool try_release() { reserved_src.load(std::memory_order_relaxed)->try_release(); reserved_src.store(nullptr, std::memory_order_relaxed); return true; } bool try_consume() { reserved_src.load(std::memory_order_relaxed)->try_consume(); reserved_src.store(nullptr, std::memory_order_relaxed); return true; } void reset() { reserved_src.store(nullptr, std::memory_order_relaxed); predecessor_cache::reset(); } void clear() { reserved_src.store(nullptr, std::memory_order_relaxed); predecessor_cache::clear(); } private: std::atomic reserved_src; }; //! An abstract cache of successors template class successor_cache : no_copy { protected: typedef M mutex_type; mutex_type my_mutex; typedef receiver successor_type; typedef receiver* pointer_type; typedef sender owner_type; // TODO revamp: introduce heapified collection of successors for strict priorities typedef std::list< pointer_type > successors_type; successors_type my_successors; owner_type* my_owner; public: successor_cache( owner_type* owner ) : my_owner(owner) { // Do not work with the passed pointer here as it may not be fully initialized yet } virtual ~successor_cache() {} void register_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); if( r.priority() != no_priority ) my_successors.push_front( &r ); else my_successors.push_back( &r ); } void remove_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); for ( typename successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { if ( *i == & r ) { my_successors.erase(i); break; } } } bool empty() { typename mutex_type::scoped_lock l(my_mutex, false); return my_successors.empty(); } void clear() { my_successors.clear(); } virtual graph_task* try_put_task( const T& t ) = 0; }; // successor_cache //! An abstract cache of successors, specialized to continue_msg template class successor_cache< continue_msg, M > : no_copy { protected: typedef M mutex_type; mutex_type my_mutex; typedef receiver successor_type; typedef receiver* pointer_type; typedef sender owner_type; typedef std::list< pointer_type > successors_type; successors_type my_successors; owner_type* my_owner; public: successor_cache( sender* owner ) : my_owner(owner) { // Do not work with the passed pointer here as it may not be fully initialized yet } virtual ~successor_cache() {} void register_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); if( r.priority() != no_priority ) my_successors.push_front( &r ); else my_successors.push_back( &r ); __TBB_ASSERT( my_owner, "Cache of successors must have an owner." ); if ( r.is_continue_receiver() ) { r.register_predecessor( *my_owner ); } } void remove_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { if ( *i == &r ) { __TBB_ASSERT(my_owner, "Cache of successors must have an owner."); // TODO: check if we need to test for continue_receiver before removing from r. r.remove_predecessor( *my_owner ); my_successors.erase(i); break; } } } bool empty() { typename mutex_type::scoped_lock l(my_mutex, false); return my_successors.empty(); } void clear() { my_successors.clear(); } virtual graph_task* try_put_task( const continue_msg& t ) = 0; }; // successor_cache< continue_msg > //! A cache of successors that are broadcast to template class broadcast_cache : public successor_cache { typedef successor_cache base_type; typedef M mutex_type; typedef typename successor_cache::successors_type successors_type; public: broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { // Do not work with the passed pointer here as it may not be fully initialized yet } // as above, but call try_put_task instead, and return the last task we received (if any) graph_task* try_put_task( const T &t ) override { graph_task * last_task = nullptr; typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { graph_task *new_task = (*i)->try_put_task(t); // workaround for icc bug graph& graph_ref = (*i)->graph_reference(); last_task = combine_tasks(graph_ref, last_task, new_task); // enqueue if necessary if(new_task) { ++i; } else { // failed if ( (*i)->register_predecessor(*this->my_owner) ) { i = this->my_successors.erase(i); } else { ++i; } } } return last_task; } // call try_put_task and return list of received tasks bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { bool is_at_least_one_put_successful = false; typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { graph_task * new_task = (*i)->try_put_task(t); if(new_task) { ++i; if(new_task != SUCCESSFULLY_ENQUEUED) { tasks.push_back(*new_task); } is_at_least_one_put_successful = true; } else { // failed if ( (*i)->register_predecessor(*this->my_owner) ) { i = this->my_successors.erase(i); } else { ++i; } } } return is_at_least_one_put_successful; } }; //! A cache of successors that are put in a round-robin fashion template class round_robin_cache : public successor_cache { typedef successor_cache base_type; typedef size_t size_type; typedef M mutex_type; typedef typename successor_cache::successors_type successors_type; public: round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) { // Do not work with the passed pointer here as it may not be fully initialized yet } size_type size() { typename mutex_type::scoped_lock l(this->my_mutex, false); return this->my_successors.size(); } graph_task* try_put_task( const T &t ) override { typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { graph_task* new_task = (*i)->try_put_task(t); if ( new_task ) { return new_task; } else { if ( (*i)->register_predecessor(*this->my_owner) ) { i = this->my_successors.erase(i); } else { ++i; } } } return NULL; } }; #endif // __TBB__flow_graph_cache_impl_H _flow_graph_impl.h000066400000000000000000000360401514453371700334470ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_flow_graph_impl_H #define __TBB_flow_graph_impl_H // #include "../config.h" #include "_task.h" #include "../task_group.h" #include "../task_arena.h" #include "../flow_graph_abstractions.h" #include "../concurrent_priority_queue.h" #include namespace tbb { namespace detail { namespace d1 { class graph_task; static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1; typedef unsigned int node_priority_t; static const node_priority_t no_priority = node_priority_t(0); class graph; class graph_node; template class graph_iterator { friend class graph; friend class graph_node; public: typedef size_t size_type; typedef GraphNodeType value_type; typedef GraphNodeType* pointer; typedef GraphNodeType& reference; typedef const GraphNodeType& const_reference; typedef std::forward_iterator_tag iterator_category; //! Copy constructor graph_iterator(const graph_iterator& other) : my_graph(other.my_graph), current_node(other.current_node) {} //! Assignment graph_iterator& operator=(const graph_iterator& other) { if (this != &other) { my_graph = other.my_graph; current_node = other.current_node; } return *this; } //! Dereference reference operator*() const; //! Dereference pointer operator->() const; //! Equality bool operator==(const graph_iterator& other) const { return ((my_graph == other.my_graph) && (current_node == other.current_node)); } #if !__TBB_CPP20_COMPARISONS_PRESENT //! Inequality bool operator!=(const graph_iterator& other) const { return !(operator==(other)); } #endif //! Pre-increment graph_iterator& operator++() { internal_forward(); return *this; } //! Post-increment graph_iterator operator++(int) { graph_iterator result = *this; operator++(); return result; } private: // the graph over which we are iterating GraphContainerType *my_graph; // pointer into my_graph's my_nodes list pointer current_node; //! Private initializing constructor for begin() and end() iterators graph_iterator(GraphContainerType *g, bool begin); void internal_forward(); }; // class graph_iterator // flags to modify the behavior of the graph reset(). Can be combined. enum reset_flags { rf_reset_protocol = 0, rf_reset_bodies = 1 << 0, // delete the current node body, reset to a copy of the initial node body. rf_clear_edges = 1 << 1 // delete edges }; void activate_graph(graph& g); void deactivate_graph(graph& g); bool is_graph_active(graph& g); graph_task* prioritize_task(graph& g, graph_task& arena_task); void spawn_in_graph_arena(graph& g, graph_task& arena_task); void enqueue_in_graph_arena(graph &g, graph_task& arena_task); class graph; //! Base class for tasks generated by graph nodes. class graph_task : public task { public: graph_task(graph& g, small_object_allocator& allocator , node_priority_t node_priority = no_priority ) : my_graph(g) , priority(node_priority) , my_allocator(allocator) {} graph& my_graph; // graph instance the task belongs to // TODO revamp: rename to my_priority node_priority_t priority; template void destruct_and_deallocate(const execution_data& ed); protected: template void finalize(const execution_data& ed); private: // To organize task_list graph_task* my_next{ nullptr }; small_object_allocator my_allocator; // TODO revamp: elaborate internal interfaces to avoid friends declarations friend class graph_task_list; friend graph_task* prioritize_task(graph& g, graph_task& gt); }; struct graph_task_comparator { bool operator()(const graph_task* left, const graph_task* right) { return left->priority < right->priority; } }; typedef tbb::concurrent_priority_queue graph_task_priority_queue_t; class priority_task_selector : public task { public: priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} task* execute(execution_data& ed) override { next_task(); __TBB_ASSERT(my_task, nullptr); task* t_next = my_task->execute(ed); my_allocator.delete_object(this, ed); return t_next; } task* cancel(execution_data& ed) override { if (!my_task) { next_task(); } __TBB_ASSERT(my_task, nullptr); task* t_next = my_task->cancel(ed); my_allocator.delete_object(this, ed); return t_next; } private: void next_task() { // TODO revamp: hold functors in priority queue instead of real tasks bool result = my_priority_queue.try_pop(my_task); __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks" " in graph's priority queue mismatched"); __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED, "Incorrect task submitted to graph priority queue"); __TBB_ASSERT(my_task->priority != no_priority, "Tasks from graph's priority queue must have priority"); } graph_task_priority_queue_t& my_priority_queue; small_object_allocator my_allocator; graph_task* my_task; }; template class run_and_put_task; template class run_task; //******************************************************************************** // graph tasks helpers //******************************************************************************** //! The list of graph tasks class graph_task_list : no_copy { private: graph_task* my_first; graph_task** my_next_ptr; public: //! Construct empty list graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {} //! True if list is empty; false otherwise. bool empty() const { return !my_first; } //! Push task onto back of list. void push_back(graph_task& task) { task.my_next = nullptr; *my_next_ptr = &task; my_next_ptr = &task.my_next; } //! Pop the front task from the list. graph_task& pop_front() { __TBB_ASSERT(!empty(), "attempt to pop item from empty task_list"); graph_task* result = my_first; my_first = result->my_next; if (!my_first) { my_next_ptr = &my_first; } return *result; } }; //! The graph class /** This class serves as a handle to the graph */ class graph : no_copy, public graph_proxy { friend class graph_node; void prepare_task_arena(bool reinit = false) { if (reinit) { __TBB_ASSERT(my_task_arena, "task arena is NULL"); my_task_arena->terminate(); my_task_arena->initialize(task_arena::attach()); } else { __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL"); my_task_arena = new task_arena(task_arena::attach()); } if (!my_task_arena->is_active()) // failed to attach my_task_arena->initialize(); // create a new, default-initialized arena __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active"); } public: //! Constructs a graph with isolated task_group_context graph(); //! Constructs a graph with use_this_context as context explicit graph(task_group_context& use_this_context); //! Destroys the graph. /** Calls wait_for_all, then destroys the root task and context. */ ~graph(); //! Used to register that an external entity may still interact with the graph. /** The graph will not return from wait_for_all until a matching number of release_wait calls is made. */ void reserve_wait() override; //! Deregisters an external entity that may have interacted with the graph. /** The graph will not return from wait_for_all until all the number of reserve_wait calls matches the number of release_wait calls. */ void release_wait() override; //! Wait until graph is idle and the number of release_wait calls equals to the number of //! reserve_wait calls. /** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */ void wait_for_all() { cancelled = false; caught_exception = false; try_call([this] { my_task_arena->execute([this] { wait(my_wait_context, *my_context); }); cancelled = my_context->is_group_execution_cancelled(); }).on_exception([this] { my_context->reset(); caught_exception = true; cancelled = true; }); // TODO: the "if" condition below is just a work-around to support the concurrent wait // mode. The cancellation and exception mechanisms are still broken in this mode. // Consider using task group not to re-implement the same functionality. if (!(my_context->traits() & task_group_context::concurrent_wait)) { my_context->reset(); // consistent with behavior in catch() } } // TODO revamp: consider adding getter for task_group_context. // ITERATORS template friend class graph_iterator; // Graph iterator typedefs typedef graph_iterator iterator; typedef graph_iterator const_iterator; // Graph iterator constructors //! start iterator iterator begin(); //! end iterator iterator end(); //! start const iterator const_iterator begin() const; //! end const iterator const_iterator end() const; //! start const iterator const_iterator cbegin() const; //! end const iterator const_iterator cend() const; // thread-unsafe state reset. void reset(reset_flags f = rf_reset_protocol); //! cancels execution of the associated task_group_context void cancel(); //! return status of graph execution bool is_cancelled() { return cancelled; } bool exception_thrown() { return caught_exception; } private: wait_context my_wait_context; task_group_context *my_context; bool own_context; bool cancelled; bool caught_exception; bool my_is_active; graph_node *my_nodes, *my_nodes_last; tbb::spin_mutex nodelist_mutex; void register_node(graph_node *n); void remove_node(graph_node *n); task_arena* my_task_arena; graph_task_priority_queue_t my_priority_queue; friend void activate_graph(graph& g); friend void deactivate_graph(graph& g); friend bool is_graph_active(graph& g); friend graph_task* prioritize_task(graph& g, graph_task& arena_task); friend void spawn_in_graph_arena(graph& g, graph_task& arena_task); friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task); friend class task_arena_base; }; // class graph template inline void graph_task::destruct_and_deallocate(const execution_data& ed) { auto allocator = my_allocator; // TODO: investigate if direct call of derived destructor gives any benefits. this->~graph_task(); allocator.deallocate(static_cast(this), ed); } template inline void graph_task::finalize(const execution_data& ed) { graph& g = my_graph; destruct_and_deallocate(ed); g.release_wait(); } //******************************************************************************** // end of graph tasks helpers //******************************************************************************** #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET class get_graph_helper; #endif //! The base of all graph nodes. class graph_node : no_copy { friend class graph; template friend class graph_iterator; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET friend class get_graph_helper; #endif protected: graph& my_graph; graph& graph_reference() const { // TODO revamp: propagate graph_reference() method to all the reference places. return my_graph; } graph_node* next = nullptr; graph_node* prev = nullptr; public: explicit graph_node(graph& g); virtual ~graph_node(); protected: // performs the reset on an individual node. virtual void reset_node(reset_flags f = rf_reset_protocol) = 0; }; // class graph_node inline void activate_graph(graph& g) { g.my_is_active = true; } inline void deactivate_graph(graph& g) { g.my_is_active = false; } inline bool is_graph_active(graph& g) { return g.my_is_active; } inline graph_task* prioritize_task(graph& g, graph_task& gt) { if( no_priority == gt.priority ) return > //! Non-preemptive priority pattern. The original task is submitted as a work item to the //! priority queue, and a new critical task is created to take and execute a work item with //! the highest known priority. The reference counting responsibility is transferred (via //! allocate_continuation) to the new task. task* critical_task = gt.my_allocator.new_object(g.my_priority_queue, gt.my_allocator); __TBB_ASSERT( critical_task, "bad_alloc?" ); g.my_priority_queue.push(>); using tbb::detail::d1::submit; submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true ); return nullptr; } //! Spawns a task inside graph arena inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) { if (is_graph_active(g)) { task* gt = prioritize_task(g, arena_task); if( !gt ) return; __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL); submit( *gt, *g.my_task_arena, *g.my_context #if __TBB_PREVIEW_CRITICAL_TASKS , /*as_critical=*/false #endif ); } } // TODO revamp: unify *_in_graph_arena functions //! Enqueues a task inside graph arena inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) { if (is_graph_active(g)) { __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" ); // TODO revamp: decide on the approach that does not postpone critical task if( task* gt = prioritize_task(g, arena_task) ) submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false); } } } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_flow_graph_impl_H _flow_graph_indexer_impl.h000066400000000000000000000402211514453371700351610ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_indexer_impl_H #define __TBB__flow_graph_indexer_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // included in namespace tbb::detail::d1 #include "_flow_graph_types_impl.h" // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of // the form tagged_msg // where the value of tag will indicate which result was put to the // successor. template graph_task* do_try_put(const T &v, void *p) { typename IndexerNodeBaseType::output_type o(K, v); return reinterpret_cast(p)->try_put_task(&o); } template struct indexer_helper { template static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { typedef typename std::tuple_element::type T; graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put; std::get(my_input).set_up(p, indexer_node_put_task, g); indexer_helper::template set_indexer_node_pointer(my_input, p, g); } }; template struct indexer_helper { template static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { typedef typename std::tuple_element<0, TupleTypes>::type T; graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put; std::get<0>(my_input).set_up(p, indexer_node_put_task, g); } }; template class indexer_input_port : public receiver { private: void* my_indexer_ptr; typedef graph_task* (* forward_function_ptr)(T const &, void* ); forward_function_ptr my_try_put_task; graph* my_graph; public: void set_up(void* p, forward_function_ptr f, graph& g) { my_indexer_ptr = p; my_try_put_task = f; my_graph = &g; } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; graph_task* try_put_task(const T &v) override { return my_try_put_task(v, my_indexer_ptr); } graph& graph_reference() const override { return *my_graph; } }; template class indexer_node_FE { public: static const int N = std::tuple_size::value; typedef OutputType output_type; typedef InputTuple input_type; // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member. indexer_node_FE() : my_inputs() {} input_type &input_ports() { return my_inputs; } protected: input_type my_inputs; }; //! indexer_node_base template class indexer_node_base : public graph_node, public indexer_node_FE, public sender { protected: using graph_node::my_graph; public: static const size_t N = std::tuple_size::value; typedef OutputType output_type; typedef StructTypes tuple_types; typedef typename sender::successor_type successor_type; typedef indexer_node_FE input_ports_type; private: // ----------- Aggregator ------------ enum op_type { reg_succ, rem_succ, try__put_task }; typedef indexer_node_base class_type; class indexer_node_base_operation : public aggregated_operation { public: char type; union { output_type const *my_arg; successor_type *my_succ; graph_task* bypass_t; }; indexer_node_base_operation(const output_type* e, op_type t) : type(char(t)), my_arg(e) {} indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), my_succ(const_cast(&s)) {} }; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator my_aggregator; void handle_operations(indexer_node_base_operation* op_list) { indexer_node_base_operation *current; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case reg_succ: my_successors.register_successor(*(current->my_succ)); current->status.store( SUCCEEDED, std::memory_order_release); break; case rem_succ: my_successors.remove_successor(*(current->my_succ)); current->status.store( SUCCEEDED, std::memory_order_release); break; case try__put_task: { current->bypass_t = my_successors.try_put_task(*(current->my_arg)); current->status.store( SUCCEEDED, std::memory_order_release); // return of try_put_task actual return value } break; } } } // ---------- end aggregator ----------- public: indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) { indexer_helper::set_indexer_node_pointer(this->my_inputs, this, g); my_aggregator.initialize_handler(handler_type(this)); } indexer_node_base(const indexer_node_base& other) : graph_node(other.my_graph), input_ports_type(), sender(), my_successors(this) { indexer_helper::set_indexer_node_pointer(this->my_inputs, this, other.my_graph); my_aggregator.initialize_handler(handler_type(this)); } bool register_successor(successor_type &r) override { indexer_node_base_operation op_data(r, reg_succ); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } bool remove_successor( successor_type &r) override { indexer_node_base_operation op_data(r, rem_succ); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } graph_task* try_put_task(output_type const *v) { // not a virtual method in this class indexer_node_base_operation op_data(v, try__put_task); my_aggregator.execute(&op_data); return op_data.bypass_t; } protected: void reset_node(reset_flags f) override { if(f & rf_clear_edges) { my_successors.clear(); } } private: broadcast_cache my_successors; }; //indexer_node_base template struct input_types; template struct input_types<1, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef tagged_msg type; }; template struct input_types<2, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef tagged_msg type; }; template struct input_types<3, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef tagged_msg type; }; template struct input_types<4, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef tagged_msg type; }; template struct input_types<5, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef typename std::tuple_element<4, InputTuple>::type fifth_type; typedef tagged_msg type; }; template struct input_types<6, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef typename std::tuple_element<4, InputTuple>::type fifth_type; typedef typename std::tuple_element<5, InputTuple>::type sixth_type; typedef tagged_msg type; }; template struct input_types<7, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef typename std::tuple_element<4, InputTuple>::type fifth_type; typedef typename std::tuple_element<5, InputTuple>::type sixth_type; typedef typename std::tuple_element<6, InputTuple>::type seventh_type; typedef tagged_msg type; }; template struct input_types<8, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef typename std::tuple_element<4, InputTuple>::type fifth_type; typedef typename std::tuple_element<5, InputTuple>::type sixth_type; typedef typename std::tuple_element<6, InputTuple>::type seventh_type; typedef typename std::tuple_element<7, InputTuple>::type eighth_type; typedef tagged_msg type; }; template struct input_types<9, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef typename std::tuple_element<4, InputTuple>::type fifth_type; typedef typename std::tuple_element<5, InputTuple>::type sixth_type; typedef typename std::tuple_element<6, InputTuple>::type seventh_type; typedef typename std::tuple_element<7, InputTuple>::type eighth_type; typedef typename std::tuple_element<8, InputTuple>::type nineth_type; typedef tagged_msg type; }; template struct input_types<10, InputTuple> { typedef typename std::tuple_element<0, InputTuple>::type first_type; typedef typename std::tuple_element<1, InputTuple>::type second_type; typedef typename std::tuple_element<2, InputTuple>::type third_type; typedef typename std::tuple_element<3, InputTuple>::type fourth_type; typedef typename std::tuple_element<4, InputTuple>::type fifth_type; typedef typename std::tuple_element<5, InputTuple>::type sixth_type; typedef typename std::tuple_element<6, InputTuple>::type seventh_type; typedef typename std::tuple_element<7, InputTuple>::type eighth_type; typedef typename std::tuple_element<8, InputTuple>::type nineth_type; typedef typename std::tuple_element<9, InputTuple>::type tenth_type; typedef tagged_msg type; }; // type generators template struct indexer_types : public input_types::value, OutputTuple> { static const int N = std::tuple_size::value; typedef typename input_types::type output_type; typedef typename wrap_tuple_elements::type input_ports_type; typedef indexer_node_FE indexer_FE_type; typedef indexer_node_base indexer_base_type; }; template class unfolded_indexer_node : public indexer_types::indexer_base_type { public: typedef typename indexer_types::input_ports_type input_ports_type; typedef OutputTuple tuple_types; typedef typename indexer_types::output_type output_type; private: typedef typename indexer_types::indexer_base_type base_type; public: unfolded_indexer_node(graph& g) : base_type(g) {} unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {} }; #endif /* __TBB__flow_graph_indexer_impl_H */ _flow_graph_item_buffer_impl.h000066400000000000000000000241671514453371700360250ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_item_buffer_impl_H #define __TBB__flow_graph_item_buffer_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif #include "_aligned_space.h" // in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h) //! Expandable buffer of items. The possible operations are push, pop, //* tests for empty and so forth. No mutual exclusion is built in. //* objects are constructed into and explicitly-destroyed. get_my_item gives // a read-only reference to the item in the buffer. set_my_item may be called // with either an empty or occupied slot. template > class item_buffer { public: typedef T item_type; enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 }; protected: typedef size_t size_type; typedef std::pair aligned_space_item; typedef aligned_space buffer_item_type; typedef typename allocator_traits::template rebind_alloc allocator_type; buffer_item_type *my_array; size_type my_array_size; static const size_type initial_buffer_size = 4; size_type my_head; size_type my_tail; bool buffer_empty() const { return my_head == my_tail; } aligned_space_item &item(size_type i) { __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of::value),NULL); __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of::value), NULL); return *my_array[i & (my_array_size - 1) ].begin(); } const aligned_space_item &item(size_type i) const { __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of::value), NULL); __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of::value), NULL); return *my_array[i & (my_array_size-1)].begin(); } bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } #if TBB_USE_ASSERT bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } #endif // object management in buffer const item_type &get_my_item(size_t i) const { __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item"); item_type* itm = const_cast(reinterpret_cast(&item(i).first)); return *itm; } // may be called with an empty slot or a slot that has already been constructed into. void set_my_item(size_t i, const item_type &o) { if(item(i).second != no_item) { destroy_item(i); } new(&(item(i).first)) item_type(o); item(i).second = has_item; } // destructively-fetch an object from the buffer void fetch_item(size_t i, item_type &o) { __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); o = get_my_item(i); // could have std::move assign semantics destroy_item(i); } // move an existing item from one slot to another. The moved-to slot must be unoccupied, // the moved-from slot must exist and not be reserved. The after, from will be empty, // to will be occupied but not reserved void move_item(size_t to, size_t from) { __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot"); __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot"); set_my_item(to, get_my_item(from)); // could have std::move semantics destroy_item(from); } // put an item in an empty slot. Return true if successful, else false bool place_item(size_t here, const item_type &me) { #if !TBB_DEPRECATED_SEQUENCER_DUPLICATES if(my_item_valid(here)) return false; #endif set_my_item(here, me); return true; } // could be implemented with std::move semantics void swap_items(size_t i, size_t j) { __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)"); item_type temp = get_my_item(i); set_my_item(i, get_my_item(j)); set_my_item(j, temp); } void destroy_item(size_type i) { __TBB_ASSERT(my_item_valid(i), "destruction of invalid item"); item(i).first.~item_type(); item(i).second = no_item; } // returns the front element const item_type& front() const { __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item"); return get_my_item(my_head); } // returns the back element const item_type& back() const { __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item"); return get_my_item(my_tail - 1); } // following methods are for reservation of the front of a buffer. void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; } void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; } void destroy_front() { destroy_item(my_head); ++my_head; } void destroy_back() { destroy_item(my_tail-1); --my_tail; } // we have to be able to test against a new tail value without changing my_tail // grow_array doesn't work if we change my_tail when the old array is too small size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; } size_type capacity() { return my_array_size; } // sequencer_node does not use this method, so we don't // need a version that passes in the new_tail value. bool buffer_full() { return size() >= capacity(); } //! Grows the internal array. void grow_my_array( size_t minimum_size ) { // test that we haven't made the structure inconsistent. __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity"); size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size; while( new_sizesecond = no_item; } for( size_type i=my_head; ifirst); (void)new(new_space) item_type(get_my_item(i)); new_array[i&(new_size-1)].begin()->second = item(i).second; } } clean_up_buffer(/*reset_pointers*/false); my_array = new_array; my_array_size = new_size; } bool push_back(item_type &v) { if(buffer_full()) { grow_my_array(size() + 1); } set_my_item(my_tail, v); ++my_tail; return true; } bool pop_back(item_type &v) { if (!my_item_valid(my_tail-1)) { return false; } v = this->back(); destroy_back(); return true; } bool pop_front(item_type &v) { if(!my_item_valid(my_head)) { return false; } v = this->front(); destroy_front(); return true; } // This is used both for reset and for grow_my_array. In the case of grow_my_array // we want to retain the values of the head and tail. void clean_up_buffer(bool reset_pointers) { if (my_array) { for( size_type i=my_head; i > class reservable_item_buffer : public item_buffer { protected: using item_buffer::my_item_valid; using item_buffer::my_head; public: reservable_item_buffer() : item_buffer(), my_reserved(false) {} void reset() {my_reserved = false; item_buffer::reset(); } protected: bool reserve_front(T &v) { if(my_reserved || !my_item_valid(this->my_head)) return false; my_reserved = true; // reserving the head v = this->front(); this->reserve_item(this->my_head); return true; } void consume_front() { __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item"); this->destroy_front(); my_reserved = false; } void release_front() { __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item"); this->release_item(this->my_head); my_reserved = false; } bool my_reserved; }; #endif // __TBB__flow_graph_item_buffer_impl_H _flow_graph_join_impl.h000066400000000000000000002417061514453371700344750ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_join_impl_H #define __TBB__flow_graph_join_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // included into namespace tbb::detail::d1 struct forwarding_base : no_assign { forwarding_base(graph &g) : graph_ref(g) {} virtual ~forwarding_base() {} graph& graph_ref; }; struct queueing_forwarding_base : forwarding_base { using forwarding_base::forwarding_base; // decrement_port_count may create a forwarding task. If we cannot handle the task // ourselves, ask decrement_port_count to deal with it. virtual graph_task* decrement_port_count(bool handle_task) = 0; }; struct reserving_forwarding_base : forwarding_base { using forwarding_base::forwarding_base; // decrement_port_count may create a forwarding task. If we cannot handle the task // ourselves, ask decrement_port_count to deal with it. virtual graph_task* decrement_port_count() = 0; virtual void increment_port_count() = 0; }; // specialization that lets us keep a copy of the current_key for building results. // KeyType can be a reference type. template struct matching_forwarding_base : public forwarding_base { typedef typename std::decay::type current_key_type; matching_forwarding_base(graph &g) : forwarding_base(g) { } virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0; current_key_type current_key; // so ports can refer to FE's desired items }; template< int N > struct join_helper { template< typename TupleType, typename PortType > static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { std::get( my_input ).set_join_node_pointer(port); join_helper::set_join_node_pointer( my_input, port ); } template< typename TupleType > static inline void consume_reservations( TupleType &my_input ) { std::get( my_input ).consume(); join_helper::consume_reservations( my_input ); } template< typename TupleType > static inline void release_my_reservation( TupleType &my_input ) { std::get( my_input ).release(); } template static inline void release_reservations( TupleType &my_input) { join_helper::release_reservations(my_input); release_my_reservation(my_input); } template< typename InputTuple, typename OutputTuple > static inline bool reserve( InputTuple &my_input, OutputTuple &out) { if ( !std::get( my_input ).reserve( std::get( out ) ) ) return false; if ( !join_helper::reserve( my_input, out ) ) { release_my_reservation( my_input ); return false; } return true; } template static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { bool res = std::get(my_input).get_item(std::get(out) ); // may fail return join_helper::get_my_item(my_input, out) && res; // do get on other inputs before returning } template static inline bool get_items(InputTuple &my_input, OutputTuple &out) { return get_my_item(my_input, out); } template static inline void reset_my_port(InputTuple &my_input) { join_helper::reset_my_port(my_input); std::get(my_input).reset_port(); } template static inline void reset_ports(InputTuple& my_input) { reset_my_port(my_input); } template static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { std::get(my_input).set_my_key_func(std::get(my_key_funcs)); std::get(my_key_funcs) = nullptr; join_helper::set_key_functors(my_input, my_key_funcs); } template< typename KeyFuncTuple> static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { __TBB_ASSERT( std::get(other_inputs).get_my_key_func(), "key matching join node should not be instantiated without functors." ); std::get(my_inputs).set_my_key_func(std::get(other_inputs).get_my_key_func()->clone()); join_helper::copy_key_functors(my_inputs, other_inputs); } template static inline void reset_inputs(InputTuple &my_input, reset_flags f) { join_helper::reset_inputs(my_input, f); std::get(my_input).reset_receiver(f); } }; // join_helper template< > struct join_helper<1> { template< typename TupleType, typename PortType > static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { std::get<0>( my_input ).set_join_node_pointer(port); } template< typename TupleType > static inline void consume_reservations( TupleType &my_input ) { std::get<0>( my_input ).consume(); } template< typename TupleType > static inline void release_my_reservation( TupleType &my_input ) { std::get<0>( my_input ).release(); } template static inline void release_reservations( TupleType &my_input) { release_my_reservation(my_input); } template< typename InputTuple, typename OutputTuple > static inline bool reserve( InputTuple &my_input, OutputTuple &out) { return std::get<0>( my_input ).reserve( std::get<0>( out ) ); } template static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { return std::get<0>(my_input).get_item(std::get<0>(out)); } template static inline bool get_items(InputTuple &my_input, OutputTuple &out) { return get_my_item(my_input, out); } template static inline void reset_my_port(InputTuple &my_input) { std::get<0>(my_input).reset_port(); } template static inline void reset_ports(InputTuple& my_input) { reset_my_port(my_input); } template static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs)); std::get<0>(my_key_funcs) = nullptr; } template< typename KeyFuncTuple> static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { __TBB_ASSERT( std::get<0>(other_inputs).get_my_key_func(), "key matching join node should not be instantiated without functors." ); std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); } template static inline void reset_inputs(InputTuple &my_input, reset_flags f) { std::get<0>(my_input).reset_receiver(f); } }; // join_helper<1> //! The two-phase join port template< typename T > class reserving_port : public receiver { public: typedef T input_type; typedef typename receiver::predecessor_type predecessor_type; private: // ----------- Aggregator ------------ enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res }; typedef reserving_port class_type; class reserving_port_operation : public aggregated_operation { public: char type; union { T *my_arg; predecessor_type *my_pred; }; reserving_port_operation(const T& e, op_type t) : type(char(t)), my_arg(const_cast(&e)) {} reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), my_pred(const_cast(&s)) {} reserving_port_operation(op_type t) : type(char(t)) {} }; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator my_aggregator; void handle_operations(reserving_port_operation* op_list) { reserving_port_operation *current; bool was_missing_predecessors = false; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case reg_pred: was_missing_predecessors = my_predecessors.empty(); my_predecessors.add(*(current->my_pred)); if ( was_missing_predecessors ) { (void) my_join->decrement_port_count(); // may try to forward } current->status.store( SUCCEEDED, std::memory_order_release); break; case rem_pred: if ( !my_predecessors.empty() ) { my_predecessors.remove(*(current->my_pred)); if ( my_predecessors.empty() ) // was the last predecessor my_join->increment_port_count(); } // TODO: consider returning failure if there were no predecessors to remove current->status.store( SUCCEEDED, std::memory_order_release ); break; case res_item: if ( reserved ) { current->status.store( FAILED, std::memory_order_release); } else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) { reserved = true; current->status.store( SUCCEEDED, std::memory_order_release); } else { if ( my_predecessors.empty() ) { my_join->increment_port_count(); } current->status.store( FAILED, std::memory_order_release); } break; case rel_res: reserved = false; my_predecessors.try_release( ); current->status.store( SUCCEEDED, std::memory_order_release); break; case con_res: reserved = false; my_predecessors.try_consume( ); current->status.store( SUCCEEDED, std::memory_order_release); break; } } } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; graph_task* try_put_task( const T & ) override { return nullptr; } graph& graph_reference() const override { return my_join->graph_ref; } public: //! Constructor reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) { my_aggregator.initialize_handler(handler_type(this)); } // copy constructor reserving_port(const reserving_port& /* other */) = delete; void set_join_node_pointer(reserving_forwarding_base *join) { my_join = join; } //! Add a predecessor bool register_predecessor( predecessor_type &src ) override { reserving_port_operation op_data(src, reg_pred); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } //! Remove a predecessor bool remove_predecessor( predecessor_type &src ) override { reserving_port_operation op_data(src, rem_pred); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } //! Reserve an item from the port bool reserve( T &v ) { reserving_port_operation op_data(v, res_item); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } //! Release the port void release( ) { reserving_port_operation op_data(rel_res); my_aggregator.execute(&op_data); } //! Complete use of the port void consume( ) { reserving_port_operation op_data(con_res); my_aggregator.execute(&op_data); } void reset_receiver( reset_flags f) { if(f & rf_clear_edges) my_predecessors.clear(); else my_predecessors.reset(); reserved = false; __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed"); } private: #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET friend class get_graph_helper; #endif reserving_forwarding_base *my_join; reservable_predecessor_cache< T, null_mutex > my_predecessors; bool reserved; }; // reserving_port //! queueing join_port template class queueing_port : public receiver, public item_buffer { public: typedef T input_type; typedef typename receiver::predecessor_type predecessor_type; typedef queueing_port class_type; // ----------- Aggregator ------------ private: enum op_type { get__item, res_port, try__put_task }; class queueing_port_operation : public aggregated_operation { public: char type; T my_val; T* my_arg; graph_task* bypass_t; // constructor for value parameter queueing_port_operation(const T& e, op_type t) : type(char(t)), my_val(e) , bypass_t(nullptr) {} // constructor for pointer parameter queueing_port_operation(const T* p, op_type t) : type(char(t)), my_arg(const_cast(p)) , bypass_t(nullptr) {} // constructor with no parameter queueing_port_operation(op_type t) : type(char(t)) , bypass_t(nullptr) {} }; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator my_aggregator; void handle_operations(queueing_port_operation* op_list) { queueing_port_operation *current; bool was_empty; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case try__put_task: { graph_task* rtask = nullptr; was_empty = this->buffer_empty(); this->push_back(current->my_val); if (was_empty) rtask = my_join->decrement_port_count(false); else rtask = SUCCESSFULLY_ENQUEUED; current->bypass_t = rtask; current->status.store( SUCCEEDED, std::memory_order_release); } break; case get__item: if(!this->buffer_empty()) { *(current->my_arg) = this->front(); current->status.store( SUCCEEDED, std::memory_order_release); } else { current->status.store( FAILED, std::memory_order_release); } break; case res_port: __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset"); this->destroy_front(); if(this->my_item_valid(this->my_head)) { (void)my_join->decrement_port_count(true); } current->status.store( SUCCEEDED, std::memory_order_release); break; } } } // ------------ End Aggregator --------------- protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; graph_task* try_put_task(const T &v) override { queueing_port_operation op_data(v, try__put_task); my_aggregator.execute(&op_data); __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator"); if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED; return op_data.bypass_t; } graph& graph_reference() const override { return my_join->graph_ref; } public: //! Constructor queueing_port() : item_buffer() { my_join = nullptr; my_aggregator.initialize_handler(handler_type(this)); } //! copy constructor queueing_port(const queueing_port& /* other */) = delete; //! record parent for tallying available items void set_join_node_pointer(queueing_forwarding_base *join) { my_join = join; } bool get_item( T &v ) { queueing_port_operation op_data(&v, get__item); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } // reset_port is called when item is accepted by successor, but // is initiated by join_node. void reset_port() { queueing_port_operation op_data(res_port); my_aggregator.execute(&op_data); return; } void reset_receiver(reset_flags) { item_buffer::reset(); } private: #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET friend class get_graph_helper; #endif queueing_forwarding_base *my_join; }; // queueing_port #include "_flow_graph_tagged_buffer_impl.h" template struct count_element { K my_key; size_t my_value; }; // method to access the key in the counting table // the ref has already been removed from K template< typename K > struct key_to_count_functor { typedef count_element table_item_type; const K& operator()(const table_item_type& v) { return v.my_key; } }; // the ports can have only one template parameter. We wrap the types needed in // a traits type template< class TraitsType > class key_matching_port : public receiver, public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, typename TraitsType::KHash > { public: typedef TraitsType traits; typedef key_matching_port class_type; typedef typename TraitsType::T input_type; typedef typename TraitsType::K key_type; typedef typename std::decay::type noref_key_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename TraitsType::TtoK type_to_key_func_type; typedef typename TraitsType::KHash hash_compare_type; typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type; private: // ----------- Aggregator ------------ private: enum op_type { try__put, get__item, res_port }; class key_matching_port_operation : public aggregated_operation { public: char type; input_type my_val; input_type *my_arg; // constructor for value parameter key_matching_port_operation(const input_type& e, op_type t) : type(char(t)), my_val(e) {} // constructor for pointer parameter key_matching_port_operation(const input_type* p, op_type t) : type(char(t)), my_arg(const_cast(p)) {} // constructor with no parameter key_matching_port_operation(op_type t) : type(char(t)) {} }; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator my_aggregator; void handle_operations(key_matching_port_operation* op_list) { key_matching_port_operation *current; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case try__put: { bool was_inserted = this->insert_with_key(current->my_val); // return failure if a duplicate insertion occurs current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release); } break; case get__item: // use current_key from FE for item if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); } current->status.store( SUCCEEDED, std::memory_order_release); break; case res_port: // use current_key from FE for item this->delete_with_key(my_join->current_key); current->status.store( SUCCEEDED, std::memory_order_release); break; } } } // ------------ End Aggregator --------------- protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; graph_task* try_put_task(const input_type& v) override { key_matching_port_operation op_data(v, try__put); graph_task* rtask = nullptr; my_aggregator.execute(&op_data); if(op_data.status == SUCCEEDED) { rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn // rtask has to reflect the return status of the try_put if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; } return rtask; } graph& graph_reference() const override { return my_join->graph_ref; } public: key_matching_port() : receiver(), buffer_type() { my_join = nullptr; my_aggregator.initialize_handler(handler_type(this)); } // copy constructor key_matching_port(const key_matching_port& /*other*/) = delete; #if __INTEL_COMPILER <= 2021 // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited // class while the parent class has the virtual keyword for the destrocutor. virtual #endif ~key_matching_port() { } void set_join_node_pointer(forwarding_base *join) { my_join = dynamic_cast*>(join); } void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); } type_to_key_func_type* get_my_key_func() { return this->get_key_func(); } bool get_item( input_type &v ) { // aggregator uses current_key from FE for Key key_matching_port_operation op_data(&v, get__item); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } // reset_port is called when item is accepted by successor, but // is initiated by join_node. void reset_port() { key_matching_port_operation op_data(res_port); my_aggregator.execute(&op_data); return; } void reset_receiver(reset_flags ) { buffer_type::reset(); } private: // my_join forwarding base used to count number of inputs that // received key. matching_forwarding_base *my_join; }; // key_matching_port using namespace graph_policy_namespace; template class join_node_base; //! join_node_FE : implements input port policy template class join_node_FE; template class join_node_FE : public reserving_forwarding_base { public: static const int N = std::tuple_size::value; typedef OutputTuple output_type; typedef InputTuple input_type; typedef join_node_base base_node_type; // for forwarding join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { ports_with_no_inputs = N; join_helper::set_join_node_pointer(my_inputs, this); } join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) { ports_with_no_inputs = N; join_helper::set_join_node_pointer(my_inputs, this); } void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } void increment_port_count() override { ++ports_with_no_inputs; } // if all input_ports have predecessors, spawn forward to try and consume tuples graph_task* decrement_port_count() override { if(ports_with_no_inputs.fetch_sub(1) == 1) { if(is_graph_active(this->graph_ref)) { small_object_allocator allocator{}; typedef forward_task_bypass task_type; graph_task* t = allocator.new_object(graph_ref, allocator, *my_node); graph_ref.reserve_wait(); spawn_in_graph_arena(this->graph_ref, *t); } } return nullptr; } input_type &input_ports() { return my_inputs; } protected: void reset( reset_flags f) { // called outside of parallel contexts ports_with_no_inputs = N; join_helper::reset_inputs(my_inputs, f); } // all methods on input ports should be called under mutual exclusion from join_node_base. bool tuple_build_may_succeed() { return !ports_with_no_inputs; } bool try_to_make_tuple(output_type &out) { if(ports_with_no_inputs) return false; return join_helper::reserve(my_inputs, out); } void tuple_accepted() { join_helper::consume_reservations(my_inputs); } void tuple_rejected() { join_helper::release_reservations(my_inputs); } input_type my_inputs; base_node_type *my_node; std::atomic ports_with_no_inputs; }; // join_node_FE template class join_node_FE : public queueing_forwarding_base { public: static const int N = std::tuple_size::value; typedef OutputTuple output_type; typedef InputTuple input_type; typedef join_node_base base_node_type; // for forwarding join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) { ports_with_no_items = N; join_helper::set_join_node_pointer(my_inputs, this); } join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) { ports_with_no_items = N; join_helper::set_join_node_pointer(my_inputs, this); } // needed for forwarding void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } void reset_port_count() { ports_with_no_items = N; } // if all input_ports have items, spawn forward to try and consume tuples graph_task* decrement_port_count(bool handle_task) override { if(ports_with_no_items.fetch_sub(1) == 1) { if(is_graph_active(this->graph_ref)) { small_object_allocator allocator{}; typedef forward_task_bypass task_type; graph_task* t = allocator.new_object(graph_ref, allocator, *my_node); graph_ref.reserve_wait(); if( !handle_task ) return t; spawn_in_graph_arena(this->graph_ref, *t); } } return nullptr; } input_type &input_ports() { return my_inputs; } protected: void reset( reset_flags f) { reset_port_count(); join_helper::reset_inputs(my_inputs, f ); } // all methods on input ports should be called under mutual exclusion from join_node_base. bool tuple_build_may_succeed() { return !ports_with_no_items; } bool try_to_make_tuple(output_type &out) { if(ports_with_no_items) return false; return join_helper::get_items(my_inputs, out); } void tuple_accepted() { reset_port_count(); join_helper::reset_ports(my_inputs); } void tuple_rejected() { // nothing to do. } input_type my_inputs; base_node_type *my_node; std::atomic ports_with_no_items; }; // join_node_FE // key_matching join front-end. template class join_node_FE, InputTuple, OutputTuple> : public matching_forwarding_base, // buffer of key value counts public hash_buffer< // typedefed below to key_to_count_buffer_type typename std::decay::type&, // force ref type on K count_element::type>, type_to_key_function_body< count_element::type>, typename std::decay::type& >, KHash >, // buffer of output items public item_buffer { public: static const int N = std::tuple_size::value; typedef OutputTuple output_type; typedef InputTuple input_type; typedef K key_type; typedef typename std::decay::type unref_key_type; typedef KHash key_hash_compare; // must use K without ref. typedef count_element count_element_type; // method that lets us refer to the key of this type. typedef key_to_count_functor key_to_count_func; typedef type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type; typedef type_to_key_function_body_leaf TtoK_function_body_leaf_type; // this is the type of the special table that keeps track of the number of discrete // elements corresponding to each key that we've seen. typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare > key_to_count_buffer_type; typedef item_buffer output_buffer_type; typedef join_node_base, InputTuple, OutputTuple> base_node_type; // for forwarding typedef matching_forwarding_base forwarding_base_type; // ----------- Aggregator ------------ // the aggregator is only needed to serialize the access to the hash table. // and the output_buffer_type base class private: enum op_type { res_count, inc_count, may_succeed, try_make }; typedef join_node_FE, InputTuple, OutputTuple> class_type; class key_matching_FE_operation : public aggregated_operation { public: char type; unref_key_type my_val; output_type* my_output; graph_task* bypass_t; // constructor for value parameter key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), my_output(nullptr), bypass_t(nullptr) {} key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} // constructor with no parameter key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} }; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator my_aggregator; // called from aggregator, so serialized // returns a task pointer if the a task would have been enqueued but we asked that // it be returned. Otherwise returns nullptr. graph_task* fill_output_buffer(unref_key_type &t) { output_type l_out; graph_task* rtask = nullptr; bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); this->current_key = t; this->delete_with_key(this->current_key); // remove the key if(join_helper::get_items(my_inputs, l_out)) { // <== call back this->push_back(l_out); if(do_fwd) { // we enqueue if receiving an item from predecessor, not if successor asks for item small_object_allocator allocator{}; typedef forward_task_bypass task_type; rtask = allocator.new_object(this->graph_ref, allocator, *my_node); this->graph_ref.reserve_wait(); do_fwd = false; } // retire the input values join_helper::reset_ports(my_inputs); // <== call back } else { __TBB_ASSERT(false, "should have had something to push"); } return rtask; } void handle_operations(key_matching_FE_operation* op_list) { key_matching_FE_operation *current; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case res_count: // called from BE { this->destroy_front(); current->status.store( SUCCEEDED, std::memory_order_release); } break; case inc_count: { // called from input ports count_element_type *p = 0; unref_key_type &t = current->my_val; if(!(this->find_ref_with_key(t,p))) { count_element_type ev; ev.my_key = t; ev.my_value = 0; this->insert_with_key(ev); bool found = this->find_ref_with_key(t, p); __TBB_ASSERT_EX(found, "should find key after inserting it"); } if(++(p->my_value) == size_t(N)) { current->bypass_t = fill_output_buffer(t); } } current->status.store( SUCCEEDED, std::memory_order_release); break; case may_succeed: // called from BE current->status.store( this->buffer_empty() ? FAILED : SUCCEEDED, std::memory_order_release); break; case try_make: // called from BE if(this->buffer_empty()) { current->status.store( FAILED, std::memory_order_release); } else { *(current->my_output) = this->front(); current->status.store( SUCCEEDED, std::memory_order_release); } break; } } } // ------------ End Aggregator --------------- public: template join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) { join_helper::set_join_node_pointer(my_inputs, this); join_helper::set_key_functors(my_inputs, TtoK_funcs); my_aggregator.initialize_handler(handler_type(this)); TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); this->set_key_func(cfb); } join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(), output_buffer_type() { my_node = nullptr; join_helper::set_join_node_pointer(my_inputs, this); join_helper::copy_key_functors(my_inputs, const_cast(other.my_inputs)); my_aggregator.initialize_handler(handler_type(this)); TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); this->set_key_func(cfb); } // needed for forwarding void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } void reset_port_count() { // called from BE key_matching_FE_operation op_data(res_count); my_aggregator.execute(&op_data); return; } // if all input_ports have items, spawn forward to try and consume tuples // return a task if we are asked and did create one. graph_task *increment_key_count(unref_key_type const & t) override { // called from input_ports key_matching_FE_operation op_data(t, inc_count); my_aggregator.execute(&op_data); return op_data.bypass_t; } input_type &input_ports() { return my_inputs; } protected: void reset( reset_flags f ) { // called outside of parallel contexts join_helper::reset_inputs(my_inputs, f); key_to_count_buffer_type::reset(); output_buffer_type::reset(); } // all methods on input ports should be called under mutual exclusion from join_node_base. bool tuple_build_may_succeed() { // called from back-end key_matching_FE_operation op_data(may_succeed); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } // cannot lock while calling back to input_ports. current_key will only be set // and reset under the aggregator, so it will remain consistent. bool try_to_make_tuple(output_type &out) { key_matching_FE_operation op_data(&out,try_make); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } void tuple_accepted() { reset_port_count(); // reset current_key after ports reset. } void tuple_rejected() { // nothing to do. } input_type my_inputs; // input ports base_node_type *my_node; }; // join_node_FE, InputTuple, OutputTuple> //! join_node_base template class join_node_base : public graph_node, public join_node_FE, public sender { protected: using graph_node::my_graph; public: typedef OutputTuple output_type; typedef typename sender::successor_type successor_type; typedef join_node_FE input_ports_type; using input_ports_type::tuple_build_may_succeed; using input_ports_type::try_to_make_tuple; using input_ports_type::tuple_accepted; using input_ports_type::tuple_rejected; private: // ----------- Aggregator ------------ enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass }; typedef join_node_base class_type; class join_node_base_operation : public aggregated_operation { public: char type; union { output_type *my_arg; successor_type *my_succ; }; graph_task* bypass_t; join_node_base_operation(const output_type& e, op_type t) : type(char(t)), my_arg(const_cast(&e)), bypass_t(nullptr) {} join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), my_succ(const_cast(&s)), bypass_t(nullptr) {} join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} }; typedef aggregating_functor handler_type; friend class aggregating_functor; bool forwarder_busy; aggregator my_aggregator; void handle_operations(join_node_base_operation* op_list) { join_node_base_operation *current; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case reg_succ: { my_successors.register_successor(*(current->my_succ)); if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) { small_object_allocator allocator{}; typedef forward_task_bypass< join_node_base > task_type; graph_task* t = allocator.new_object(my_graph, allocator, *this); my_graph.reserve_wait(); spawn_in_graph_arena(my_graph, *t); forwarder_busy = true; } current->status.store( SUCCEEDED, std::memory_order_release); } break; case rem_succ: my_successors.remove_successor(*(current->my_succ)); current->status.store( SUCCEEDED, std::memory_order_release); break; case try__get: if(tuple_build_may_succeed()) { if(try_to_make_tuple(*(current->my_arg))) { tuple_accepted(); current->status.store( SUCCEEDED, std::memory_order_release); } else current->status.store( FAILED, std::memory_order_release); } else current->status.store( FAILED, std::memory_order_release); break; case do_fwrd_bypass: { bool build_succeeded; graph_task *last_task = nullptr; output_type out; // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted // are separate locked methods in the FE. We could conceivably fetch the front // of the FE queue, then be swapped out, have someone else consume the FE's // object, then come back, forward, and then try to remove it from the queue // again. Without reservation of the FE, the methods accessing it must be locked. // We could remember the keys of the objects we forwarded, and then remove // them from the input ports after forwarding is complete? if(tuple_build_may_succeed()) { // checks output queue of FE do { build_succeeded = try_to_make_tuple(out); // fetch front_end of queue if(build_succeeded) { graph_task *new_task = my_successors.try_put_task(out); last_task = combine_tasks(my_graph, last_task, new_task); if(new_task) { tuple_accepted(); } else { tuple_rejected(); build_succeeded = false; } } } while(build_succeeded); } current->bypass_t = last_task; current->status.store( SUCCEEDED, std::memory_order_release); forwarder_busy = false; } break; } } } // ---------- end aggregator ----------- public: join_node_base(graph &g) : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this) { input_ports_type::set_my_node(this); my_aggregator.initialize_handler(handler_type(this)); } join_node_base(const join_node_base& other) : graph_node(other.graph_node::my_graph), input_ports_type(other), sender(), forwarder_busy(false), my_successors(this) { input_ports_type::set_my_node(this); my_aggregator.initialize_handler(handler_type(this)); } template join_node_base(graph &g, FunctionTuple f) : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this) { input_ports_type::set_my_node(this); my_aggregator.initialize_handler(handler_type(this)); } bool register_successor(successor_type &r) override { join_node_base_operation op_data(r, reg_succ); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } bool remove_successor( successor_type &r) override { join_node_base_operation op_data(r, rem_succ); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } bool try_get( output_type &v) override { join_node_base_operation op_data(v, try__get); my_aggregator.execute(&op_data); return op_data.status == SUCCEEDED; } protected: void reset_node(reset_flags f) override { input_ports_type::reset(f); if(f & rf_clear_edges) my_successors.clear(); } private: broadcast_cache my_successors; friend class forward_task_bypass< join_node_base >; graph_task *forward_task() { join_node_base_operation op_data(do_fwrd_bypass); my_aggregator.execute(&op_data); return op_data.bypass_t; } }; // join_node_base // join base class type generator template class PT, typename OutputTuple, typename JP> struct join_base { typedef join_node_base::type, OutputTuple> type; }; template struct join_base > { typedef key_matching key_traits_type; typedef K key_type; typedef KHash key_hash_compare; typedef join_node_base< key_traits_type, // ports type typename wrap_key_tuple_elements::type, OutputTuple > type; }; //! unfolded_join_node : passes input_ports_type to join_node_base. We build the input port type // using tuple_element. The class PT is the port type (reserving_port, queueing_port, key_matching_port) // and should match the typename. template class PT, typename OutputTuple, typename JP> class unfolded_join_node : public join_base::type { public: typedef typename wrap_tuple_elements::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base base_type; public: unfolded_join_node(graph &g) : base_type(g) {} unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING template struct key_from_message_body { K operator()(const T& t) const { return key_from_message(t); } }; // Adds const to reference type template struct key_from_message_body { const K& operator()(const T& t) const { return key_from_message(t); } }; #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ // key_matching unfolded_join_node. This must be a separate specialization because the constructors // differ. template class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching > : public join_base<2,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; public: typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base, input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef std::tuple< f0_p, f1_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1) ) ) { static_assert(std::tuple_size::value == 2, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; template class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching > : public join_base<3,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; public: typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base, input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef std::tuple< f0_p, f1_p, f2_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2) ) ) { static_assert(std::tuple_size::value == 3, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; template class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching > : public join_base<4,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; public: typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base, input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3) ) ) { static_assert(std::tuple_size::value == 4, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; template class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching > : public join_base<5,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; typedef typename std::tuple_element<4, OutputTuple>::type T4; public: typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base , input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef type_to_key_function_body *f4_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3), new type_to_key_function_body_leaf(body4) ) ) { static_assert(std::tuple_size::value == 5, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #if __TBB_VARIADIC_MAX >= 6 template class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching > : public join_base<6,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; typedef typename std::tuple_element<4, OutputTuple>::type T4; typedef typename std::tuple_element<5, OutputTuple>::type T5; public: typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base , input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef type_to_key_function_body *f4_p; typedef type_to_key_function_body *f5_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3), new type_to_key_function_body_leaf(body4), new type_to_key_function_body_leaf(body5) ) ) { static_assert(std::tuple_size::value == 6, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #endif #if __TBB_VARIADIC_MAX >= 7 template class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching > : public join_base<7,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; typedef typename std::tuple_element<4, OutputTuple>::type T4; typedef typename std::tuple_element<5, OutputTuple>::type T5; typedef typename std::tuple_element<6, OutputTuple>::type T6; public: typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base , input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef type_to_key_function_body *f4_p; typedef type_to_key_function_body *f5_p; typedef type_to_key_function_body *f6_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5, Body6 body6) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3), new type_to_key_function_body_leaf(body4), new type_to_key_function_body_leaf(body5), new type_to_key_function_body_leaf(body6) ) ) { static_assert(std::tuple_size::value == 7, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #endif #if __TBB_VARIADIC_MAX >= 8 template class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching > : public join_base<8,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; typedef typename std::tuple_element<4, OutputTuple>::type T4; typedef typename std::tuple_element<5, OutputTuple>::type T5; typedef typename std::tuple_element<6, OutputTuple>::type T6; typedef typename std::tuple_element<7, OutputTuple>::type T7; public: typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base , input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef type_to_key_function_body *f4_p; typedef type_to_key_function_body *f5_p; typedef type_to_key_function_body *f6_p; typedef type_to_key_function_body *f7_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3), new type_to_key_function_body_leaf(body4), new type_to_key_function_body_leaf(body5), new type_to_key_function_body_leaf(body6), new type_to_key_function_body_leaf(body7) ) ) { static_assert(std::tuple_size::value == 8, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #endif #if __TBB_VARIADIC_MAX >= 9 template class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching > : public join_base<9,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; typedef typename std::tuple_element<4, OutputTuple>::type T4; typedef typename std::tuple_element<5, OutputTuple>::type T5; typedef typename std::tuple_element<6, OutputTuple>::type T6; typedef typename std::tuple_element<7, OutputTuple>::type T7; typedef typename std::tuple_element<8, OutputTuple>::type T8; public: typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base , input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef type_to_key_function_body *f4_p; typedef type_to_key_function_body *f5_p; typedef type_to_key_function_body *f6_p; typedef type_to_key_function_body *f7_p; typedef type_to_key_function_body *f8_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3), new type_to_key_function_body_leaf(body4), new type_to_key_function_body_leaf(body5), new type_to_key_function_body_leaf(body6), new type_to_key_function_body_leaf(body7), new type_to_key_function_body_leaf(body8) ) ) { static_assert(std::tuple_size::value == 9, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #endif #if __TBB_VARIADIC_MAX >= 10 template class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching > : public join_base<10,key_matching_port,OutputTuple,key_matching >::type { typedef typename std::tuple_element<0, OutputTuple>::type T0; typedef typename std::tuple_element<1, OutputTuple>::type T1; typedef typename std::tuple_element<2, OutputTuple>::type T2; typedef typename std::tuple_element<3, OutputTuple>::type T3; typedef typename std::tuple_element<4, OutputTuple>::type T4; typedef typename std::tuple_element<5, OutputTuple>::type T5; typedef typename std::tuple_element<6, OutputTuple>::type T6; typedef typename std::tuple_element<7, OutputTuple>::type T7; typedef typename std::tuple_element<8, OutputTuple>::type T8; typedef typename std::tuple_element<9, OutputTuple>::type T9; public: typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching,OutputTuple>::type input_ports_type; typedef OutputTuple output_type; private: typedef join_node_base , input_ports_type, output_type > base_type; typedef type_to_key_function_body *f0_p; typedef type_to_key_function_body *f1_p; typedef type_to_key_function_body *f2_p; typedef type_to_key_function_body *f3_p; typedef type_to_key_function_body *f4_p; typedef type_to_key_function_body *f5_p; typedef type_to_key_function_body *f6_p; typedef type_to_key_function_body *f7_p; typedef type_to_key_function_body *f8_p; typedef type_to_key_function_body *f9_p; typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type; public: #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING unfolded_join_node(graph &g) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()), new type_to_key_function_body_leaf >(key_from_message_body()) ) ) { } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type( new type_to_key_function_body_leaf(body0), new type_to_key_function_body_leaf(body1), new type_to_key_function_body_leaf(body2), new type_to_key_function_body_leaf(body3), new type_to_key_function_body_leaf(body4), new type_to_key_function_body_leaf(body5), new type_to_key_function_body_leaf(body6), new type_to_key_function_body_leaf(body7), new type_to_key_function_body_leaf(body8), new type_to_key_function_body_leaf(body9) ) ) { static_assert(std::tuple_size::value == 10, "wrong number of body initializers"); } unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} }; #endif //! templated function to refer to input ports of the join node template typename std::tuple_element::type &input_port(JNT &jn) { return std::get(jn.input_ports()); } #endif // __TBB__flow_graph_join_impl_H _flow_graph_node_impl.h000066400000000000000000000662771514453371700344730ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_node_impl_H #define __TBB__flow_graph_node_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif #include "_flow_graph_item_buffer_impl.h" template< typename T, typename A > class function_input_queue : public item_buffer { public: bool empty() const { return this->buffer_empty(); } const T& front() const { return this->item_buffer::front(); } void pop() { this->destroy_front(); } bool push( T& t ) { return this->push_back( t ); } }; //! Input and scheduling for a function node that takes a type Input as input // The only up-ref is apply_body_impl, which should implement the function // call and any handling of the result. template< typename Input, typename Policy, typename A, typename ImplType > class function_input_base : public receiver, no_assign { enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency }; typedef function_input_base class_type; public: //! The input type of this receiver typedef Input input_type; typedef typename receiver::predecessor_type predecessor_type; typedef predecessor_cache predecessor_cache_type; typedef function_input_queue input_queue_type; typedef typename allocator_traits::template rebind_alloc allocator_type; static_assert(!has_policy::value || !has_policy::value, ""); //! Constructor for function_input_base function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority, bool is_no_throw ) : my_graph_ref(g), my_max_concurrency(max_concurrency) , my_concurrency(0), my_priority(a_priority), my_is_no_throw(is_no_throw) , my_queue(!has_policy::value ? new input_queue_type() : NULL) , my_predecessors(this) , forwarder_busy(false) { my_aggregator.initialize_handler(handler_type(this)); } //! Copy constructor function_input_base( const function_input_base& src ) : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority, src.my_is_no_throw) {} //! Destructor // The queue is allocated by the constructor for {multi}function_node. // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead. // This would be an interface-breaking change. virtual ~function_input_base() { delete my_queue; my_queue = nullptr; } graph_task* try_put_task( const input_type& t) override { if ( my_is_no_throw ) return try_put_task_impl(t, has_policy()); else return try_put_task_impl(t, std::false_type()); } //! Adds src to the list of cached predecessors. bool register_predecessor( predecessor_type &src ) override { operation_type op_data(reg_pred); op_data.r = &src; my_aggregator.execute(&op_data); return true; } //! Removes src from the list of cached predecessors. bool remove_predecessor( predecessor_type &src ) override { operation_type op_data(rem_pred); op_data.r = &src; my_aggregator.execute(&op_data); return true; } protected: void reset_function_input_base( reset_flags f) { my_concurrency = 0; if(my_queue) { my_queue->reset(); } reset_receiver(f); forwarder_busy = false; } graph& my_graph_ref; const size_t my_max_concurrency; size_t my_concurrency; node_priority_t my_priority; const bool my_is_no_throw; input_queue_type *my_queue; predecessor_cache my_predecessors; void reset_receiver( reset_flags f) { if( f & rf_clear_edges) my_predecessors.clear(); else my_predecessors.reset(); __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed"); } graph& graph_reference() const override { return my_graph_ref; } graph_task* try_get_postponed_task(const input_type& i) { operation_type op_data(i, app_body_bypass); // tries to pop an item or get_item my_aggregator.execute(&op_data); return op_data.bypass_t; } private: friend class apply_body_task_bypass< class_type, input_type >; friend class forward_task_bypass< class_type >; class operation_type : public aggregated_operation< operation_type > { public: char type; union { input_type *elem; predecessor_type *r; }; graph_task* bypass_t; operation_type(const input_type& e, op_type t) : type(char(t)), elem(const_cast(&e)) {} operation_type(op_type t) : type(char(t)), r(NULL) {} }; bool forwarder_busy; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator< handler_type, operation_type > my_aggregator; graph_task* perform_queued_requests() { graph_task* new_task = NULL; if(my_queue) { if(!my_queue->empty()) { ++my_concurrency; new_task = create_body_task(my_queue->front()); my_queue->pop(); } } else { input_type i; if(my_predecessors.get_item(i)) { ++my_concurrency; new_task = create_body_task(i); } } return new_task; } void handle_operations(operation_type *op_list) { operation_type* tmp; while (op_list) { tmp = op_list; op_list = op_list->next; switch (tmp->type) { case reg_pred: my_predecessors.add(*(tmp->r)); tmp->status.store(SUCCEEDED, std::memory_order_release); if (!forwarder_busy) { forwarder_busy = true; spawn_forward_task(); } break; case rem_pred: my_predecessors.remove(*(tmp->r)); tmp->status.store(SUCCEEDED, std::memory_order_release); break; case app_body_bypass: { tmp->bypass_t = NULL; __TBB_ASSERT(my_max_concurrency != 0, NULL); --my_concurrency; if(my_concurrencybypass_t = perform_queued_requests(); tmp->status.store(SUCCEEDED, std::memory_order_release); } break; case tryput_bypass: internal_try_put_task(tmp); break; case try_fwd: internal_forward(tmp); break; case occupy_concurrency: if (my_concurrency < my_max_concurrency) { ++my_concurrency; tmp->status.store(SUCCEEDED, std::memory_order_release); } else { tmp->status.store(FAILED, std::memory_order_release); } break; } } } //! Put to the node, but return the task instead of enqueueing it void internal_try_put_task(operation_type *op) { __TBB_ASSERT(my_max_concurrency != 0, NULL); if (my_concurrency < my_max_concurrency) { ++my_concurrency; graph_task * new_task = create_body_task(*(op->elem)); op->bypass_t = new_task; op->status.store(SUCCEEDED, std::memory_order_release); } else if ( my_queue && my_queue->push(*(op->elem)) ) { op->bypass_t = SUCCESSFULLY_ENQUEUED; op->status.store(SUCCEEDED, std::memory_order_release); } else { op->bypass_t = NULL; op->status.store(FAILED, std::memory_order_release); } } //! Creates tasks for postponed messages if available and if concurrency allows void internal_forward(operation_type *op) { op->bypass_t = NULL; if (my_concurrency < my_max_concurrency) op->bypass_t = perform_queued_requests(); if(op->bypass_t) op->status.store(SUCCEEDED, std::memory_order_release); else { forwarder_busy = false; op->status.store(FAILED, std::memory_order_release); } } graph_task* internal_try_put_bypass( const input_type& t ) { operation_type op_data(t, tryput_bypass); my_aggregator.execute(&op_data); if( op_data.status == SUCCEEDED ) { return op_data.bypass_t; } return NULL; } graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { if( my_max_concurrency == 0 ) { return apply_body_bypass(t); } else { operation_type check_op(t, occupy_concurrency); my_aggregator.execute(&check_op); if( check_op.status == SUCCEEDED ) { return apply_body_bypass(t); } return internal_try_put_bypass(t); } } graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { if( my_max_concurrency == 0 ) { return create_body_task(t); } else { return internal_try_put_bypass(t); } } //! Applies the body to the provided input // then decides if more work is available graph_task* apply_body_bypass( const input_type &i ) { return static_cast(this)->apply_body_impl_bypass(i); } //! allocates a task to apply a body graph_task* create_body_task( const input_type &input ) { if (!is_graph_active(my_graph_ref)) { return nullptr; } // TODO revamp: extract helper for common graph task allocation part small_object_allocator allocator{}; typedef apply_body_task_bypass task_type; graph_task* t = allocator.new_object( my_graph_ref, allocator, *this, input, my_priority ); graph_reference().reserve_wait(); return t; } //! This is executed by an enqueued task, the "forwarder" graph_task* forward_task() { operation_type op_data(try_fwd); graph_task* rval = NULL; do { op_data.status = WAIT; my_aggregator.execute(&op_data); if(op_data.status == SUCCEEDED) { graph_task* ttask = op_data.bypass_t; __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL ); rval = combine_tasks(my_graph_ref, rval, ttask); } } while (op_data.status == SUCCEEDED); return rval; } inline graph_task* create_forward_task() { if (!is_graph_active(my_graph_ref)) { return nullptr; } small_object_allocator allocator{}; typedef forward_task_bypass task_type; graph_task* t = allocator.new_object( graph_reference(), allocator, *this, my_priority ); graph_reference().reserve_wait(); return t; } //! Spawns a task that calls forward() inline void spawn_forward_task() { graph_task* tp = create_forward_task(); if(tp) { spawn_in_graph_arena(graph_reference(), *tp); } } node_priority_t priority() const override { return my_priority; } }; // function_input_base //! Implements methods for a function node that takes a type Input as input and sends // a type Output to its successors. template< typename Input, typename Output, typename Policy, typename A> class function_input : public function_input_base > { public: typedef Input input_type; typedef Output output_type; typedef function_body function_body_type; typedef function_input my_class; typedef function_input_base base_type; typedef function_input_queue input_queue_type; // constructor template function_input( graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority ) : base_type(g, max_concurrency, a_priority, noexcept(body(input_type()))) , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { } //! Copy constructor function_input( const function_input& src ) : base_type(src), my_body( src.my_init_body->clone() ), my_init_body(src.my_init_body->clone() ) { } #if __INTEL_COMPILER <= 2021 // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited // class while the parent class has the virtual keyword for the destrocutor. virtual #endif ~function_input() { delete my_body; delete my_init_body; } template< typename Body > Body copy_function_object() { function_body_type &body_ref = *this->my_body; return dynamic_cast< function_body_leaf & >(body_ref).get_body(); } output_type apply_body_impl( const input_type& i) { // There is an extra copied needed to capture the // body execution without the try_put fgt_begin_body( my_body ); output_type v = (*my_body)(i); fgt_end_body( my_body ); return v; } //TODO: consider moving into the base class graph_task* apply_body_impl_bypass( const input_type &i) { output_type v = apply_body_impl(i); graph_task* postponed_task = NULL; if( base_type::my_max_concurrency != 0 ) { postponed_task = base_type::try_get_postponed_task(i); __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL ); } if( postponed_task ) { // make the task available for other workers since we do not know successors' // execution policy spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); } graph_task* successor_task = successors().try_put_task(v); #if _MSC_VER && !__INTEL_COMPILER #pragma warning (push) #pragma warning (disable: 4127) /* suppress conditional expression is constant */ #endif if(has_policy::value) { #if _MSC_VER && !__INTEL_COMPILER #pragma warning (pop) #endif if(!successor_task) { // Return confirmative status since current // node's body has been executed anyway successor_task = SUCCESSFULLY_ENQUEUED; } } return successor_task; } protected: void reset_function_input(reset_flags f) { base_type::reset_function_input_base(f); if(f & rf_reset_bodies) { function_body_type *tmp = my_init_body->clone(); delete my_body; my_body = tmp; } } function_body_type *my_body; function_body_type *my_init_body; virtual broadcast_cache &successors() = 0; }; // function_input // helper templates to clear the successor edges of the output ports of an multifunction_node template struct clear_element { template static void clear_this(P &p) { (void)std::get(p).successors().clear(); clear_element::clear_this(p); } #if TBB_USE_ASSERT template static bool this_empty(P &p) { if(std::get(p).successors().empty()) return clear_element::this_empty(p); return false; } #endif }; template<> struct clear_element<1> { template static void clear_this(P &p) { (void)std::get<0>(p).successors().clear(); } #if TBB_USE_ASSERT template static bool this_empty(P &p) { return std::get<0>(p).successors().empty(); } #endif }; template struct init_output_ports { template static OutputTuple call(graph& g, const std::tuple&) { return OutputTuple(Args(g)...); } }; // struct init_output_ports //! Implements methods for a function node that takes a type Input as input // and has a tuple of output ports specified. template< typename Input, typename OutputPortSet, typename Policy, typename A> class multifunction_input : public function_input_base > { public: static const int N = std::tuple_size::value; typedef Input input_type; typedef OutputPortSet output_ports_type; typedef multifunction_body multifunction_body_type; typedef multifunction_input my_class; typedef function_input_base base_type; typedef function_input_queue input_queue_type; // constructor template multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority ) : base_type(g, max_concurrency, a_priority, noexcept(body(input_type(), my_output_ports))) , my_body( new multifunction_body_leaf(body) ) , my_init_body( new multifunction_body_leaf(body) ) , my_output_ports(init_output_ports::call(g, my_output_ports)){ } //! Copy constructor multifunction_input( const multifunction_input& src ) : base_type(src), my_body( src.my_init_body->clone() ), my_init_body(src.my_init_body->clone() ), my_output_ports( init_output_ports::call(src.my_graph_ref, my_output_ports) ) { } ~multifunction_input() { delete my_body; delete my_init_body; } template< typename Body > Body copy_function_object() { multifunction_body_type &body_ref = *this->my_body; return *static_cast(dynamic_cast< multifunction_body_leaf & >(body_ref).get_body_ptr()); } // for multifunction nodes we do not have a single successor as such. So we just tell // the task we were successful. //TODO: consider moving common parts with implementation in function_input into separate function graph_task* apply_body_impl_bypass( const input_type &i ) { fgt_begin_body( my_body ); (*my_body)(i, my_output_ports); fgt_end_body( my_body ); graph_task* ttask = NULL; if(base_type::my_max_concurrency != 0) { ttask = base_type::try_get_postponed_task(i); } return ttask ? ttask : SUCCESSFULLY_ENQUEUED; } output_ports_type &output_ports(){ return my_output_ports; } protected: void reset(reset_flags f) { base_type::reset_function_input_base(f); if(f & rf_clear_edges)clear_element::clear_this(my_output_ports); if(f & rf_reset_bodies) { multifunction_body_type* tmp = my_init_body->clone(); delete my_body; my_body = tmp; } __TBB_ASSERT(!(f & rf_clear_edges) || clear_element::this_empty(my_output_ports), "multifunction_node reset failed"); } multifunction_body_type *my_body; multifunction_body_type *my_init_body; output_ports_type my_output_ports; }; // multifunction_input // template to refer to an output port of a multifunction_node template typename std::tuple_element::type &output_port(MOP &op) { return std::get(op.output_ports()); } inline void check_task_and_spawn(graph& g, graph_task* t) { if (t && t != SUCCESSFULLY_ENQUEUED) { spawn_in_graph_arena(g, *t); } } // helper structs for split_node template struct emit_element { template static graph_task* emit_this(graph& g, const T &t, P &p) { // TODO: consider to collect all the tasks in task_list and spawn them all at once graph_task* last_task = std::get(p).try_put_task(std::get(t)); check_task_and_spawn(g, last_task); return emit_element::emit_this(g,t,p); } }; template<> struct emit_element<1> { template static graph_task* emit_this(graph& g, const T &t, P &p) { graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t)); check_task_and_spawn(g, last_task); return SUCCESSFULLY_ENQUEUED; } }; //! Implements methods for an executable node that takes continue_msg as input template< typename Output, typename Policy> class continue_input : public continue_receiver { public: //! The input type of this receiver typedef continue_msg input_type; //! The output type of this receiver typedef Output output_type; typedef function_body function_body_type; typedef continue_input class_type; template< typename Body > continue_input( graph &g, Body& body, node_priority_t a_priority ) : continue_receiver(/*number_of_predecessors=*/0, a_priority) , my_graph_ref(g) , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { } template< typename Body > continue_input( graph &g, int number_of_predecessors, Body& body, node_priority_t a_priority ) : continue_receiver( number_of_predecessors, a_priority ) , my_graph_ref(g) , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { } continue_input( const continue_input& src ) : continue_receiver(src), my_graph_ref(src.my_graph_ref), my_body( src.my_init_body->clone() ), my_init_body( src.my_init_body->clone() ) {} ~continue_input() { delete my_body; delete my_init_body; } template< typename Body > Body copy_function_object() { function_body_type &body_ref = *my_body; return dynamic_cast< function_body_leaf & >(body_ref).get_body(); } void reset_receiver( reset_flags f) override { continue_receiver::reset_receiver(f); if(f & rf_reset_bodies) { function_body_type *tmp = my_init_body->clone(); delete my_body; my_body = tmp; } } protected: graph& my_graph_ref; function_body_type *my_body; function_body_type *my_init_body; virtual broadcast_cache &successors() = 0; friend class apply_body_task_bypass< class_type, continue_msg >; //! Applies the body to the provided input graph_task* apply_body_bypass( input_type ) { // There is an extra copied needed to capture the // body execution without the try_put fgt_begin_body( my_body ); output_type v = (*my_body)( continue_msg() ); fgt_end_body( my_body ); return successors().try_put_task( v ); } graph_task* execute() override { if(!is_graph_active(my_graph_ref)) { return NULL; } #if _MSC_VER && !__INTEL_COMPILER #pragma warning (push) #pragma warning (disable: 4127) /* suppress conditional expression is constant */ #endif if(has_policy::value) { #if _MSC_VER && !__INTEL_COMPILER #pragma warning (pop) #endif return apply_body_bypass( continue_msg() ); } else { small_object_allocator allocator{}; typedef apply_body_task_bypass task_type; graph_task* t = allocator.new_object( graph_reference(), allocator, *this, continue_msg(), my_priority ); graph_reference().reserve_wait(); return t; } } graph& graph_reference() const override { return my_graph_ref; } }; // continue_input //! Implements methods for both executable and function nodes that puts Output to its successors template< typename Output > class function_output : public sender { public: template friend struct clear_element; typedef Output output_type; typedef typename sender::successor_type successor_type; typedef broadcast_cache broadcast_cache_type; function_output(graph& g) : my_successors(this), my_graph_ref(g) {} function_output(const function_output& other) = delete; //! Adds a new successor to this node bool register_successor( successor_type &r ) override { successors().register_successor( r ); return true; } //! Removes a successor from this node bool remove_successor( successor_type &r ) override { successors().remove_successor( r ); return true; } broadcast_cache_type &successors() { return my_successors; } graph& graph_reference() const { return my_graph_ref; } protected: broadcast_cache_type my_successors; graph& my_graph_ref; }; // function_output template< typename Output > class multifunction_output : public function_output { public: typedef Output output_type; typedef function_output base_type; using base_type::my_successors; multifunction_output(graph& g) : base_type(g) {} multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} bool try_put(const output_type &i) { graph_task *res = try_put_task(i); if( !res ) return false; if( res != SUCCESSFULLY_ENQUEUED ) { // wrapping in task_arena::execute() is not needed since the method is called from // inside task::execute() spawn_in_graph_arena(graph_reference(), *res); } return true; } using base_type::graph_reference; protected: graph_task* try_put_task(const output_type &i) { return my_successors.try_put_task(i); } template friend struct emit_element; }; // multifunction_output //composite_node template void add_nodes_impl(CompositeType*, bool) {} template< typename CompositeType, typename NodeType1, typename... NodeTypes > void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) { void *addr = const_cast(&n1); fgt_alias_port(c_node, addr, visible); add_nodes_impl(c_node, visible, n...); } #endif // __TBB__flow_graph_node_impl_H _flow_graph_node_set_impl.h000066400000000000000000000237011514453371700353270ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2020-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_flow_graph_node_set_impl_H #define __TBB_flow_graph_node_set_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // Included in namespace tbb::detail::d1 (in flow_graph.h) #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET // Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get // Seems like the well-formed expression in trailing decltype is treated as ill-formed // TODO: investigate problems with decltype in trailing return types or find the cross-platform solution #define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900) namespace order { struct undefined {}; struct following {}; struct preceding {}; } class get_graph_helper { public: // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph // and remove get_graph_helper template static graph& get(const T& object) { return get_impl(object, std::is_base_of()); } private: // Get graph from the object of type derived from graph_node template static graph& get_impl(const T& object, std::true_type) { return static_cast(&object)->my_graph; } template static graph& get_impl(const T& object, std::false_type) { return object.graph_reference(); } }; template struct node_set { typedef Order order_type; std::tuple nodes; node_set(Nodes&... ns) : nodes(ns...) {} template node_set(const node_set& set) : nodes(set.nodes) {} graph& graph_reference() const { return get_graph_helper::get(std::get<0>(nodes)); } }; namespace alias_helpers { template using output_type = typename T::output_type; template using output_ports_type = typename T::output_ports_type; template using input_type = typename T::input_type; template using input_ports_type = typename T::input_ports_type; } // namespace alias_helpers template using has_output_type = supports; template using has_input_type = supports; template using has_input_ports_type = supports; template using has_output_ports_type = supports; template struct is_sender : std::is_base_of, T> {}; template struct is_receiver : std::is_base_of, T> {}; template struct is_async_node : std::false_type {}; template struct is_async_node> : std::true_type {}; template node_set follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) { static_assert((conjunction, has_output_type...>::value), "Not all node's predecessors has output_type typedef"); static_assert((conjunction, is_sender...>::value), "Not all node's predecessors are senders"); return node_set(first_predecessor, predecessors...); } template node_set follows(node_set& predecessors_set) { static_assert((conjunction...>::value), "Not all nodes in the set has output_type typedef"); static_assert((conjunction...>::value), "Not all nodes in the set are senders"); return node_set(predecessors_set); } template node_set precedes(FirstSuccessor& first_successor, Successors&... successors) { static_assert((conjunction, has_input_type...>::value), "Not all node's successors has input_type typedef"); static_assert((conjunction, is_receiver...>::value), "Not all node's successors are receivers"); return node_set(first_successor, successors...); } template node_set precedes(node_set& successors_set) { static_assert((conjunction...>::value), "Not all nodes in the set has input_type typedef"); static_assert((conjunction...>::value), "Not all nodes in the set are receivers"); return node_set(successors_set); } template node_set make_node_set(Node& first_node, Nodes&... nodes) { return node_set(first_node, nodes...); } template class successor_selector { template static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port(node)) { return input_port(node); } template static NodeType& get_impl(NodeType& node, std::false_type) { return node; } public: template #if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE static auto& get(NodeType& node) #else static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type())) #endif { return get_impl(node, has_input_ports_type()); } }; template class predecessor_selector { template static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port(node)) { return output_port(node); } template static NodeType& internal_get(NodeType& node, std::false_type) { return node;} template #if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE static auto& get_impl(NodeType& node, std::false_type) #else static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type())) #endif { return internal_get(node, has_output_ports_type()); } template static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; } public: template #if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE static auto& get(NodeType& node) #else static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node())) #endif { return get_impl(node, is_async_node()); } }; template class make_edges_helper { public: template static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { make_edge(std::get(predecessors), successor_selector::get(node)); make_edges_helper::connect_predecessors(predecessors, node); } template static void connect_successors(NodeType& node, SuccessorsTuple& successors) { make_edge(predecessor_selector::get(node), std::get(successors)); make_edges_helper::connect_successors(node, successors); } }; template<> struct make_edges_helper<0> { template static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { make_edge(std::get<0>(predecessors), successor_selector<0>::get(node)); } template static void connect_successors(NodeType& node, SuccessorsTuple& successors) { make_edge(predecessor_selector<0>::get(node), std::get<0>(successors)); } }; // TODO: consider adding an overload for making edges between node sets template void make_edges(const node_set& s, NodeType& node) { const std::size_t SetSize = std::tuple_size::value; make_edges_helper::connect_predecessors(s.nodes, node); } template void make_edges(NodeType& node, const node_set& s) { const std::size_t SetSize = std::tuple_size::value; make_edges_helper::connect_successors(node, s.nodes); } template void make_edges_in_order(const node_set& ns, NodeType& node) { make_edges(ns, node); } template void make_edges_in_order(const node_set& ns, NodeType& node) { make_edges(node, ns); } #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET #endif // __TBB_flow_graph_node_set_impl_H _flow_graph_nodes_deduction.h000066400000000000000000000224741514453371700356620ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_flow_graph_nodes_deduction_H #define __TBB_flow_graph_nodes_deduction_H #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT namespace tbb { namespace detail { namespace d1 { template struct declare_body_types { using input_type = Input; using output_type = Output; }; struct NoInputBody {}; template struct declare_body_types { using output_type = Output; }; template struct body_types; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template struct body_types : declare_body_types {}; template using input_t = typename body_types::input_type; template using output_t = typename body_types::output_type; template auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name); template auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name); template auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name); template auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name); template auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name); template auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name); template decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int); template decltype(decide_on_operator_overload(std::declval())) decide_on_callable_type(...); // Deduction guides for Flow Graph nodes template input_node(GraphOrSet&&, Body) ->input_node(0))>>; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template struct decide_on_set; template struct decide_on_set> { using type = typename Node::output_type; }; template struct decide_on_set> { using type = typename Node::input_type; }; template using decide_on_set_t = typename decide_on_set>::type; template broadcast_node(const NodeSet&) ->broadcast_node>; template buffer_node(const NodeSet&) ->buffer_node>; template queue_node(const NodeSet&) ->queue_node>; #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template sequencer_node(GraphOrProxy&&, Sequencer) ->sequencer_node(0))>>; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template priority_queue_node(const NodeSet&, const Compare&) ->priority_queue_node, Compare>; template priority_queue_node(const NodeSet&) ->priority_queue_node, std::less>>; #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template struct join_key { using type = Key; }; template struct join_key { using type = T&; }; template using join_key_t = typename join_key::type; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template join_node(const node_set&, Policy) ->join_node, Policy>; template join_node(const node_set&, Policy) ->join_node; template join_node(const node_set) ->join_node, queueing>; template join_node(const node_set) ->join_node; #endif template join_node(GraphOrProxy&&, Body, Bodies...) ->join_node(0))>, input_t(0))>...>, key_matching(0))>>>>; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set&) ->indexer_node; #endif #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template limiter_node(const NodeSet&, size_t) ->limiter_node>; template split_node(const node_set&) ->split_node; template split_node(const node_set&) ->split_node>; #endif template function_node(GraphOrSet&&, size_t, Body, Policy, node_priority_t = no_priority) ->function_node(0))>, output_t(0))>, Policy>; template function_node(GraphOrSet&&, size_t, Body, node_priority_t = no_priority) ->function_node(0))>, output_t(0))>, queueing>; template struct continue_output { using type = Output; }; template <> struct continue_output { using type = continue_msg; }; template using continue_output_t = typename continue_output::type; template continue_node(GraphOrSet&&, Body, Policy, node_priority_t = no_priority) ->continue_node>, Policy>; template continue_node(GraphOrSet&&, int, Body, Policy, node_priority_t = no_priority) ->continue_node>, Policy>; template continue_node(GraphOrSet&&, Body, node_priority_t = no_priority) ->continue_node>, Policy>; template continue_node(GraphOrSet&&, int, Body, node_priority_t = no_priority) ->continue_node>, Policy>; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template overwrite_node(const NodeSet&) ->overwrite_node>; template write_once_node(const NodeSet&) ->write_once_node>; #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT #endif // __TBB_flow_graph_nodes_deduction_H _flow_graph_tagged_buffer_impl.h000066400000000000000000000241351514453371700363150ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ // a hash table buffer that can expand, and can support as many deletions as // additions, list-based, with elements of list held in array (for destruction // management), multiplicative hashing (like ets). No synchronization built-in. // #ifndef __TBB__flow_graph_hash_buffer_impl_H #define __TBB__flow_graph_hash_buffer_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // included in namespace tbb::flow::interfaceX::internal // elements in the table are a simple list; we need pointer to next element to // traverse the chain template struct buffer_element_type { // the second parameter below is void * because we can't forward-declare the type // itself, so we just reinterpret_cast below. typedef typename aligned_pair::type type; }; template < typename Key, // type of key within ValueType typename ValueType, typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType typename HashCompare, // has hash and equal typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair::type > > class hash_buffer : public HashCompare { public: static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table typedef ValueType value_type; typedef typename buffer_element_type< value_type >::type element_type; typedef value_type *pointer_type; typedef element_type *list_array_type; // array we manage manually typedef list_array_type *pointer_array_type; typedef typename std::allocator_traits::template rebind_alloc pointer_array_allocator_type; typedef typename std::allocator_traits::template rebind_alloc elements_array_allocator; typedef typename std::decay::type Knoref; private: ValueToKey *my_key; size_t my_size; size_t nelements; pointer_array_type pointer_array; // pointer_array[my_size] list_array_type elements_array; // elements_array[my_size / 2] element_type* free_list; size_t mask() { return my_size - 1; } void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) { for(size_t i=0; i < sz - 1; ++i ) { // construct free list la[i].second = &(la[i+1]); } la[sz-1].second = NULL; *p_free_list = (element_type *)&(la[0]); } // cleanup for exceptions struct DoCleanup { pointer_array_type *my_pa; list_array_type *my_elements; size_t my_size; DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) : my_pa(&pa), my_elements(&my_els), my_size(sz) { } ~DoCleanup() { if(my_pa) { size_t dont_care = 0; internal_free_buffer(*my_pa, *my_elements, my_size, dont_care); } } }; // exception-safety requires we do all the potentially-throwing operations first void grow_array() { size_t new_size = my_size*2; size_t new_nelements = nelements; // internal_free_buffer zeroes this list_array_type new_elements_array = NULL; pointer_array_type new_pointer_array = NULL; list_array_type new_free_list = NULL; { DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size); new_elements_array = elements_array_allocator().allocate(my_size); new_pointer_array = pointer_array_allocator_type().allocate(new_size); for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL; set_up_free_list(&new_free_list, new_elements_array, my_size ); for(size_t i=0; i < my_size; ++i) { for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) { value_type *ov = reinterpret_cast(&(op->first)); // could have std::move semantics internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov); } } my_cleanup.my_pa = NULL; my_cleanup.my_elements = NULL; } internal_free_buffer(pointer_array, elements_array, my_size, nelements); free_list = new_free_list; pointer_array = new_pointer_array; elements_array = new_elements_array; my_size = new_size; nelements = new_nelements; } // v should have perfect forwarding if std::move implemented. // we use this method to move elements in grow_array, so can't use class fields void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list, const value_type &v) { size_t l_mask = p_sz-1; __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); size_t h = this->hash((*my_key)(v)) & l_mask; __TBB_ASSERT(p_free_list, "Error: free list not set up."); element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second); (void) new(&(my_elem->first)) value_type(v); my_elem->second = p_pointer_array[h]; p_pointer_array[h] = my_elem; } void internal_initialize_buffer() { pointer_array = pointer_array_allocator_type().allocate(my_size); for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL; elements_array = elements_array_allocator().allocate(my_size / 2); set_up_free_list(&free_list, elements_array, my_size / 2); } // made static so an enclosed class can use to properly dispose of the internals static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) { if(pa) { for(size_t i = 0; i < sz; ++i ) { element_type *p_next; for( element_type *p = pa[i]; p; p = p_next) { p_next = (element_type *)p->second; // TODO revamp: make sure type casting is correct. void* ptr = (void*)(p->first); #if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER suppress_unused_warning(ptr); #endif ((value_type*)ptr)->~value_type(); } } pointer_array_allocator_type().deallocate(pa, sz); pa = NULL; } // Separate test (if allocation of pa throws, el may be allocated. // but no elements will be constructed.) if(el) { elements_array_allocator().deallocate(el, sz / 2); el = NULL; } sz = INITIAL_SIZE; ne = 0; } public: hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) { internal_initialize_buffer(); } ~hash_buffer() { internal_free_buffer(pointer_array, elements_array, my_size, nelements); delete my_key; my_key = nullptr; } hash_buffer(const hash_buffer&) = delete; hash_buffer& operator=(const hash_buffer&) = delete; void reset() { internal_free_buffer(pointer_array, elements_array, my_size, nelements); internal_initialize_buffer(); } // Take ownership of func object allocated with new. // This method is only used internally, so can't be misused by user. void set_key_func(ValueToKey *vtk) { my_key = vtk; } // pointer is used to clone() ValueToKey* get_key_func() { return my_key; } bool insert_with_key(const value_type &v) { pointer_type p = NULL; __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); if(find_ref_with_key((*my_key)(v), p)) { p->~value_type(); (void) new(p) value_type(v); // copy-construct into the space return false; } ++nelements; if(nelements*2 > my_size) grow_array(); internal_insert_with_key(pointer_array, my_size, free_list, v); return true; } // returns true and sets v to array element if found, else returns false. bool find_ref_with_key(const Knoref& k, pointer_type &v) { size_t i = this->hash(k) & mask(); for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) { pointer_type pv = reinterpret_cast(&(p->first)); __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); if(this->equal((*my_key)(*pv), k)) { v = pv; return true; } } return false; } bool find_with_key( const Knoref& k, value_type &v) { value_type *p; if(find_ref_with_key(k, p)) { v = *p; return true; } else return false; } void delete_with_key(const Knoref& k) { size_t h = this->hash(k) & mask(); element_type* prev = NULL; for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) { value_type *vp = reinterpret_cast(&(p->first)); __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); if(this->equal((*my_key)(*vp), k)) { vp->~value_type(); if(prev) prev->second = p->second; else pointer_array[h] = (element_type *)(p->second); p->second = free_list; free_list = p; --nelements; return; } } __TBB_ASSERT(false, "key not found for delete"); } }; #endif // __TBB__flow_graph_hash_buffer_impl_H _flow_graph_trace_impl.h000066400000000000000000000371461514453371700346350ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef _FGT_GRAPH_TRACE_IMPL_H #define _FGT_GRAPH_TRACE_IMPL_H #include "../profiling.h" #if (_MSC_VER >= 1900) #include #endif namespace tbb { namespace detail { namespace d1 { template< typename T > class sender; template< typename T > class receiver; #if TBB_USE_PROFILING_TOOLS #if __TBB_FLOW_TRACE_CODEPTR #if (_MSC_VER >= 1900) #define CODEPTR() (_ReturnAddress()) #elif __TBB_GCC_VERSION >= 40800 #define CODEPTR() ( __builtin_return_address(0)) #else #define CODEPTR() NULL #endif #else #define CODEPTR() NULL #endif /* __TBB_FLOW_TRACE_CODEPTR */ static inline void fgt_alias_port(void *node, void *p, bool visible) { if(visible) itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); else itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); } static inline void fgt_composite ( void* codeptr, void *node, void *graph ) { itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); suppress_unused_warning( codeptr ); #if __TBB_FLOW_TRACE_CODEPTR if (codeptr != NULL) { register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); } #endif } static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) { itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); } static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) { itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); } template void alias_input_port(void *node, receiver* port, string_resource_index name_index) { // TODO: Make fgt_internal_alias_input_port a function template? fgt_internal_alias_input_port( node, port, name_index); } template < typename PortsTuple, int N > struct fgt_internal_input_alias_helper { static void alias_port( void *node, PortsTuple &ports ) { alias_input_port( node, &(std::get(ports)), static_cast(FLOW_INPUT_PORT_0 + N - 1) ); fgt_internal_input_alias_helper::alias_port( node, ports ); } }; template < typename PortsTuple > struct fgt_internal_input_alias_helper { static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { } }; template void alias_output_port(void *node, sender* port, string_resource_index name_index) { // TODO: Make fgt_internal_alias_output_port a function template? fgt_internal_alias_output_port( node, static_cast(port), name_index); } template < typename PortsTuple, int N > struct fgt_internal_output_alias_helper { static void alias_port( void *node, PortsTuple &ports ) { alias_output_port( node, &(std::get(ports)), static_cast(FLOW_OUTPUT_PORT_0 + N - 1) ); fgt_internal_output_alias_helper::alias_port( node, ports ); } }; template < typename PortsTuple > struct fgt_internal_output_alias_helper { static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) { } }; static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) { itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); } static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) { itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); suppress_unused_warning( codeptr ); #if __TBB_FLOW_TRACE_CODEPTR if (codeptr != NULL) { register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); } #endif } template void register_input_port(void *node, receiver* port, string_resource_index name_index) { // TODO: Make fgt_internal_create_input_port a function template? fgt_internal_create_input_port(node, static_cast(port), name_index); } template < typename PortsTuple, int N > struct fgt_internal_input_helper { static void register_port( void *node, PortsTuple &ports ) { register_input_port( node, &(std::get(ports)), static_cast(FLOW_INPUT_PORT_0 + N - 1) ); fgt_internal_input_helper::register_port( node, ports ); } }; template < typename PortsTuple > struct fgt_internal_input_helper { static void register_port( void *node, PortsTuple &ports ) { register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 ); } }; template void register_output_port(void* codeptr, void *node, sender* port, string_resource_index name_index) { // TODO: Make fgt_internal_create_output_port a function template? fgt_internal_create_output_port( codeptr, node, static_cast(port), name_index); } template < typename PortsTuple, int N > struct fgt_internal_output_helper { static void register_port( void* codeptr, void *node, PortsTuple &ports ) { register_output_port( codeptr, node, &(std::get(ports)), static_cast(FLOW_OUTPUT_PORT_0 + N - 1) ); fgt_internal_output_helper::register_port( codeptr, node, ports ); } }; template < typename PortsTuple > struct fgt_internal_output_helper { static void register_port( void* codeptr, void *node, PortsTuple &ports ) { register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 ); } }; template< typename NodeType > void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) { void *addr = (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); } template< typename NodeType > void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) { void *addr = const_cast(node); itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); } template< typename NodeType > static inline void fgt_node_desc( const NodeType *node, const char *desc ) { void *addr = (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); } static inline void fgt_graph_desc( const void *g, const char *desc ) { void *addr = const_cast< void *>(g); itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); } static inline void fgt_body( void *node, void *body ) { itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); } template< int N, typename PortsTuple > static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) { itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); fgt_internal_output_helper::register_port(codeptr, input_port, ports ); } template< int N, typename PortsTuple > static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) { itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); fgt_internal_output_helper::register_port( codeptr, input_port, ports ); fgt_body( input_port, body ); } template< int N, typename PortsTuple > static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) { itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); fgt_internal_input_helper::register_port( output_port, ports ); } static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) { itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); suppress_unused_warning( codeptr ); #if __TBB_FLOW_TRACE_CODEPTR if (codeptr != NULL) { register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); } #endif } static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) { itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); } static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) { itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); fgt_body( output_port, body ); } static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) { fgt_node( codeptr, t, g, output_port ); fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); } static inline void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) { fgt_node_with_body( codeptr, t, g, output_port, body ); fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); } static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) { fgt_node( codeptr, t, g, input_port, output_port ); fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 ); } static inline void fgt_make_edge( void *output_port, void *input_port ) { itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); } static inline void fgt_remove_edge( void *output_port, void *input_port ) { itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); } static inline void fgt_graph( void *g ) { itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH ); } static inline void fgt_begin_body( void *body ) { itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY ); } static inline void fgt_end_body( void * ) { itt_task_end( ITT_DOMAIN_FLOW ); } static inline void fgt_async_try_put_begin( void *node, void *port ) { itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); } static inline void fgt_async_try_put_end( void *, void * ) { itt_task_end( ITT_DOMAIN_FLOW ); } static inline void fgt_async_reserve( void *node, void *graph ) { itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); } static inline void fgt_async_commit( void *node, void * /*graph*/) { itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE ); } static inline void fgt_reserve_wait( void *graph ) { itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL ); } static inline void fgt_release_wait( void *graph ) { itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); } #else // TBB_USE_PROFILING_TOOLS #define CODEPTR() NULL static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { } static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { } static inline void fgt_graph( void * /*g*/ ) { } template< typename NodeType > static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } template< typename NodeType > static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { } template< int N, typename PortsTuple > static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { } template< int N, typename PortsTuple > static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { } template< int N, typename PortsTuple > static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { } static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { } static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { } static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { } static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { } static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { } static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { } static inline void fgt_begin_body( void * /*body*/ ) { } static inline void fgt_end_body( void * /*body*/) { } static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { } static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { } static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { } static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { } static inline void fgt_reserve_wait( void * /*graph*/ ) { } static inline void fgt_release_wait( void * /*graph*/ ) { } template< typename NodeType > void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } template < typename PortsTuple, int N > struct fgt_internal_input_alias_helper { static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } }; template < typename PortsTuple, int N > struct fgt_internal_output_alias_helper { static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } }; #endif // TBB_USE_PROFILING_TOOLS } // d1 } // namespace detail } // namespace tbb #endif // _FGT_GRAPH_TRACE_IMPL_H _flow_graph_types_impl.h000066400000000000000000000364071514453371700347020ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_types_impl_H #define __TBB__flow_graph_types_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif // included in namespace tbb::detail::d1 // the change to key_matching (adding a K and KHash template parameter, making it a class) // means we have to pass this data to the key_matching_port. All the ports have only one // template parameter, so we have to wrap the following types in a trait: // // . K == key_type // . KHash == hash and compare for Key // . TtoK == function_body that given an object of T, returns its K // . T == type accepted by port, and stored in the hash table // // The port will have an additional parameter on node construction, which is a function_body // that accepts a const T& and returns a K which is the field in T which is its K. template struct KeyTrait { typedef Kp K; typedef Tp T; typedef type_to_key_function_body TtoK; typedef KHashp KHash; }; // wrap each element of a tuple in a template, and make a tuple of the result. template class PT, typename TypeTuple> struct wrap_tuple_elements; // A wrapper that generates the traits needed for each port of a key-matching join, // and the type of the tuple of input ports. template class PT, typename KeyTraits, typename TypeTuple> struct wrap_key_tuple_elements; template class PT, typename... Args> struct wrap_tuple_elements >{ typedef typename std::tuple... > type; }; template class PT, typename KeyTraits, typename... Args> struct wrap_key_tuple_elements > { typedef typename KeyTraits::key_type K; typedef typename KeyTraits::hash_compare_type KHash; typedef typename std::tuple >... > type; }; template< int... S > class sequence {}; template< int N, int... S > struct make_sequence : make_sequence < N - 1, N - 1, S... > {}; template< int... S > struct make_sequence < 0, S... > { typedef sequence type; }; //! type mimicking std::pair but with trailing fill to ensure each element of an array //* will have the correct alignment template struct type_plus_align { char first[sizeof(T1)]; T2 second; char fill1[REM]; }; template struct type_plus_align { char first[sizeof(T1)]; T2 second; }; template struct alignment_of { typedef struct { char t; U padded; } test_alignment; static const size_t value = sizeof(test_alignment) - sizeof(U); }; // T1, T2 are actual types stored. The space defined for T1 in the type returned // is a char array of the correct size. Type T2 should be trivially-constructible, // T1 must be explicitly managed. template struct aligned_pair { static const size_t t1_align = alignment_of::value; static const size_t t2_align = alignment_of::value; typedef type_plus_align just_pair; static const size_t max_align = t1_align < t2_align ? t2_align : t1_align; static const size_t extra_bytes = sizeof(just_pair) % max_align; static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0; public: typedef type_plus_align type; }; // aligned_pair // support for variant type // type we use when we're not storing a value struct default_constructed { }; // type which contains another type, tests for what type is contained, and references to it. // Wrapper // void CopyTo( void *newSpace) : builds a Wrapper copy of itself in newSpace // struct to allow us to copy and test the type of objects struct WrapperBase { virtual ~WrapperBase() {} virtual void CopyTo(void* /*newSpace*/) const = 0; }; // Wrapper contains a T, with the ability to test what T is. The Wrapper can be // constructed from a T, can be copy-constructed from another Wrapper, and can be // examined via value(), but not modified. template struct Wrapper: public WrapperBase { typedef T value_type; typedef T* pointer_type; private: T value_space; public: const value_type &value() const { return value_space; } private: Wrapper(); // on exception will ensure the Wrapper will contain only a trivially-constructed object struct _unwind_space { pointer_type space; _unwind_space(pointer_type p) : space(p) {} ~_unwind_space() { if(space) (void) new (space) Wrapper(default_constructed()); } }; public: explicit Wrapper( const T& other ) : value_space(other) { } explicit Wrapper(const Wrapper& other) = delete; void CopyTo(void* newSpace) const override { _unwind_space guard((pointer_type)newSpace); (void) new(newSpace) Wrapper(value_space); guard.space = NULL; } ~Wrapper() { } }; // specialization for array objects template struct Wrapper : public WrapperBase { typedef T value_type; typedef T* pointer_type; // space must be untyped. typedef T ArrayType[N]; private: // The space is not of type T[N] because when copy-constructing, it would be // default-initialized and then copied to in some fashion, resulting in two // constructions and one destruction per element. If the type is char[ ], we // placement new into each element, resulting in one construction per element. static const size_t space_size = sizeof(ArrayType); char value_space[space_size]; // on exception will ensure the already-built objects will be destructed // (the value_space is a char array, so it is already trivially-destructible.) struct _unwind_class { pointer_type space; int already_built; _unwind_class(pointer_type p) : space(p), already_built(0) {} ~_unwind_class() { if(space) { for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type(); (void) new(space) Wrapper(default_constructed()); } } }; public: const ArrayType &value() const { char *vp = const_cast(value_space); return reinterpret_cast(*vp); } private: Wrapper(); public: // have to explicitly construct because other decays to a const value_type* explicit Wrapper(const ArrayType& other) { _unwind_class guard((pointer_type)value_space); pointer_type vp = reinterpret_cast(&value_space); for(size_t i = 0; i < N; ++i ) { (void) new(vp++) value_type(other[i]); ++(guard.already_built); } guard.space = NULL; } explicit Wrapper(const Wrapper& other) : WrapperBase() { // we have to do the heavy lifting to copy contents _unwind_class guard((pointer_type)value_space); pointer_type dp = reinterpret_cast(value_space); pointer_type sp = reinterpret_cast(const_cast(other.value_space)); for(size_t i = 0; i < N; ++i, ++dp, ++sp) { (void) new(dp) value_type(*sp); ++(guard.already_built); } guard.space = NULL; } void CopyTo(void* newSpace) const override { (void) new(newSpace) Wrapper(*this); // exceptions handled in copy constructor } ~Wrapper() { // have to destroy explicitly in reverse order pointer_type vp = reinterpret_cast(&value_space); for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type(); } }; // given a tuple, return the type of the element that has the maximum alignment requirement. // Given a tuple and that type, return the number of elements of the object with the max // alignment requirement that is at least as big as the largest object in the tuple. template struct pick_one; template struct pick_one { typedef T1 type; }; template struct pick_one { typedef T2 type; }; template< template class Selector, typename T1, typename T2 > struct pick_max { typedef typename pick_one< (Selector::value > Selector::value), T1, T2 >::type type; }; template struct size_of { static const int value = sizeof(T); }; template< size_t N, class Tuple, template class Selector > struct pick_tuple_max { typedef typename pick_tuple_max::type LeftMaxType; typedef typename std::tuple_element::type ThisType; typedef typename pick_max::type type; }; template< class Tuple, template class Selector > struct pick_tuple_max<0, Tuple, Selector> { typedef typename std::tuple_element<0, Tuple>::type type; }; // is the specified type included in a tuple? template struct is_element_of { typedef typename std::tuple_element::type T_i; static const bool value = std::is_same::value || is_element_of::value; }; template struct is_element_of { typedef typename std::tuple_element<0, Tuple>::type T_i; static const bool value = std::is_same::value; }; // allow the construction of types that are listed tuple. If a disallowed type // construction is written, a method involving this type is created. The // type has no definition, so a syntax error is generated. template struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple; template struct do_if; template struct do_if { static void construct(void *mySpace, const T& x) { (void) new(mySpace) Wrapper(x); } }; template struct do_if { static void construct(void * /*mySpace*/, const T& x) { // This method is instantiated when the type T does not match any of the // element types in the Tuple in variant. ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple::bad_type(x); } }; // Tuple tells us the allowed types that variant can hold. It determines the alignment of the space in // Wrapper, and how big Wrapper is. // // the object can only be tested for type, and a read-only reference can be fetched by cast_to(). using tbb::detail::punned_cast; struct tagged_null_type {}; template class tagged_msg { typedef std::tuple= 6 , T5 #endif #if __TBB_VARIADIC_MAX >= 7 , T6 #endif #if __TBB_VARIADIC_MAX >= 8 , T7 #endif #if __TBB_VARIADIC_MAX >= 9 , T8 #endif #if __TBB_VARIADIC_MAX >= 10 , T9 #endif > Tuple; private: class variant { static const size_t N = std::tuple_size::value; typedef typename pick_tuple_max::type AlignType; typedef typename pick_tuple_max::type MaxSizeType; static const size_t MaxNBytes = (sizeof(Wrapper)+sizeof(AlignType)-1); static const size_t MaxNElements = MaxNBytes/sizeof(AlignType); typedef aligned_space SpaceType; SpaceType my_space; static const size_t MaxSize = sizeof(SpaceType); public: variant() { (void) new(&my_space) Wrapper(default_constructed()); } template variant( const T& x ) { do_if::value>::construct(&my_space,x); } variant(const variant& other) { const WrapperBase * h = punned_cast(&(other.my_space)); h->CopyTo(&my_space); } // assignment must destroy and re-create the Wrapper type, as there is no way // to create a Wrapper-to-Wrapper assign even if we find they agree in type. void operator=( const variant& rhs ) { if(&rhs != this) { WrapperBase *h = punned_cast(&my_space); h->~WrapperBase(); const WrapperBase *ch = punned_cast(&(rhs.my_space)); ch->CopyTo(&my_space); } } template const U& variant_cast_to() const { const Wrapper *h = dynamic_cast*>(punned_cast(&my_space)); if(!h) { throw_exception(exception_id::bad_tagged_msg_cast); } return h->value(); } template bool variant_is_a() const { return dynamic_cast*>(punned_cast(&my_space)) != NULL; } bool variant_is_default_constructed() const {return variant_is_a();} ~variant() { WrapperBase *h = punned_cast(&my_space); h->~WrapperBase(); } }; //class variant TagType my_tag; variant my_msg; public: tagged_msg(): my_tag(TagType(~0)), my_msg(){} template tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {} template tagged_msg(T const &index, R (&value)[N]) : my_tag(index), my_msg(value) {} void set_tag(TagType const &index) {my_tag = index;} TagType tag() const {return my_tag;} template const V& cast_to() const {return my_msg.template variant_cast_to();} template bool is_a() const {return my_msg.template variant_is_a();} bool is_default_constructed() const {return my_msg.variant_is_default_constructed();} }; //class tagged_msg // template to simplify cast and test for tagged_msg in template contexts template const V& cast_to(T const &t) { return t.template cast_to(); } template bool is_a(T const &t) { return t.template is_a(); } enum op_stat { WAIT = 0, SUCCEEDED, FAILED }; #endif /* __TBB__flow_graph_types_impl_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_hash_compare.h000066400000000000000000000106101514453371700330010ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__hash_compare_H #define __TBB_detail__hash_compare_H #include #include "_containers_helpers.h" namespace tbb { namespace detail { namespace d1 { template class hash_compare { using is_transparent_hash = has_transparent_key_equal; public: using hasher = Hash; using key_equal = typename is_transparent_hash::type; hash_compare() = default; hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {} std::size_t operator()( const Key& key ) const { return std::size_t(my_hasher(key)); } bool operator()( const Key& key1, const Key& key2 ) const { return my_equal(key1, key2); } template ::type> std::size_t operator()( const K& key ) const { return std::size_t(my_hasher(key)); } template ::type> bool operator()( const K1& key1, const K2& key2 ) const { return my_equal(key1, key2); } hasher hash_function() const { return my_hasher; } key_equal key_eq() const { return my_equal; } private: hasher my_hasher; key_equal my_equal; }; // class hash_compare //! hash_compare that is default argument for concurrent_hash_map template class tbb_hash_compare { public: std::size_t hash( const Key& a ) const { return my_hash_func(a); } #if defined(_MSC_VER) && _MSC_VER <= 1900 #pragma warning (push) // MSVC 2015 throws a strange warning: 'std::size_t': forcing value to bool 'true' or 'false' #pragma warning (disable: 4800) #endif bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); } #if defined(_MSC_VER) && _MSC_VER <= 1900 #pragma warning (pop) #endif private: std::hash my_hash_func; std::equal_to my_key_equal; }; } // namespace d1 #if __TBB_CPP20_CONCEPTS_PRESENT inline namespace d0 { template concept hash_compare = std::copy_constructible && requires( const std::remove_reference_t& hc, const Key& key1, const Key& key2 ) { { hc.hash(key1) } -> std::same_as; { hc.equal(key1, key2) } -> std::convertible_to; }; } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT } // namespace detail } // namespace tbb #if TBB_DEFINE_STD_HASH_SPECIALIZATIONS namespace std { template struct hash> { public: std::size_t operator()( const std::pair& p ) const { return first_hash(p.first) ^ second_hash(p.second); } private: std::hash first_hash; std::hash second_hash; }; // struct hash // Apple clang and MSVC defines their own specializations for std::hash> #if !(_LIBCPP_VERSION) && !(_CPPLIB_VER) template struct hash> { public: std::size_t operator()( const std::basic_string& s ) const { std::size_t h = 0; for ( const CharT* c = s.c_str(); *c; ++c ) { h = h * hash_multiplier ^ char_hash(*c); } return h; } private: static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value; std::hash char_hash; }; // struct hash #endif // !(_LIBCPP_VERSION || _CPPLIB_VER) } // namespace std #endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS #endif // __TBB_detail__hash_compare_H _intrusive_list_node.h000066400000000000000000000026661514453371700343750ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef _TBB_detail__intrusive_list_node_H #define _TBB_detail__intrusive_list_node_H namespace tbb { namespace detail { namespace d1 { //! Data structure to be inherited by the types that can form intrusive lists. /** Intrusive list is formed by means of the member_intrusive_list template class. Note that type T must derive from intrusive_list_node either publicly or declare instantiation member_intrusive_list as a friend. This class implements a limited subset of std::list interface. **/ struct intrusive_list_node { intrusive_list_node* my_prev_node{}; intrusive_list_node* my_next_node{}; #if TBB_USE_ASSERT intrusive_list_node() { my_prev_node = my_next_node = this; } #endif /* TBB_USE_ASSERT */ }; } // namespace d1 } // namespace detail } // namespace tbb #endif // _TBB_detail__intrusive_list_node_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_machine.h000066400000000000000000000310571514453371700317640ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__machine_H #define __TBB_detail__machine_H #include "_config.h" #include "_assert.h" #include #include #include #include #ifdef _WIN32 #include #ifdef __TBBMALLOC_BUILD #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX #define NOMINMAX #endif #include // SwitchToThread() #endif #ifdef _MSC_VER #if __TBB_x86_64 || __TBB_x86_32 #pragma intrinsic(__rdtsc) #endif #endif #endif #if __TBB_x86_64 || __TBB_x86_32 #include // _mm_pause #endif #if (_WIN32) #include // _control87 #endif #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN #include // sched_yield #else #include // std::this_thread::yield() #endif namespace tbb { namespace detail { inline namespace d0 { //-------------------------------------------------------------------------------------------------- // Yield implementation //-------------------------------------------------------------------------------------------------- #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN static inline void yield() { int err = sched_yield(); __TBB_ASSERT_EX(err == 0, "sched_yield has failed"); } #elif __TBBMALLOC_BUILD && _WIN32 // Use Windows API for yield in tbbmalloc to avoid dependency on C++ runtime with some implementations. static inline void yield() { SwitchToThread(); } #else using std::this_thread::yield; #endif //-------------------------------------------------------------------------------------------------- // atomic_fence_seq_cst implementation //-------------------------------------------------------------------------------------------------- static inline void atomic_fence_seq_cst() { #if (__TBB_x86_64 || __TBB_x86_32) && defined(__GNUC__) && __GNUC__ < 11 unsigned char dummy = 0u; __asm__ __volatile__ ("lock; notb %0" : "+m" (dummy) :: "memory"); #else std::atomic_thread_fence(std::memory_order_seq_cst); #endif } //-------------------------------------------------------------------------------------------------- // Pause implementation //-------------------------------------------------------------------------------------------------- static inline void machine_pause(int32_t delay) { #if __TBB_x86_64 || __TBB_x86_32 while (delay-- > 0) { _mm_pause(); } #elif __ARM_ARCH_7A__ || __aarch64__ while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } #else /* Generic */ (void)delay; // suppress without including _template_helpers.h yield(); #endif } //////////////////////////////////////////////////////////////////////////////////////////////////// // tbb::detail::log2() implementation //////////////////////////////////////////////////////////////////////////////////////////////////// // TODO: Use log2p1() function that will be available in C++20 standard #if defined(__GNUC__) || defined(__clang__) namespace gnu_builtins { inline uintptr_t clz(unsigned int x) { return __builtin_clz(x); } inline uintptr_t clz(unsigned long int x) { return __builtin_clzl(x); } inline uintptr_t clz(unsigned long long int x) { return __builtin_clzll(x); } } #elif defined(_MSC_VER) #pragma intrinsic(__TBB_W(_BitScanReverse)) namespace msvc_intrinsics { static inline uintptr_t bit_scan_reverse(uintptr_t i) { unsigned long j; __TBB_W(_BitScanReverse)( &j, i ); return j; } } #endif template constexpr std::uintptr_t number_of_bits() { return sizeof(T) * CHAR_BIT; } // logarithm is the index of the most significant non-zero bit static inline uintptr_t machine_log2(uintptr_t x) { #if defined(__GNUC__) || defined(__clang__) // If P is a power of 2 and x() - 1) ^ gnu_builtins::clz(x); #elif defined(_MSC_VER) return msvc_intrinsics::bit_scan_reverse(x); #elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__ uintptr_t j, i = x; __asm__("bsr %1,%0" : "=r"(j) : "r"(i)); return j; #elif __powerpc__ || __POWERPC__ #if __TBB_WORDSIZE==8 __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); return 63 - static_cast(x); #else __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); return 31 - static_cast(x); #endif /*__TBB_WORDSIZE*/ #elif __sparc uint64_t count; // one hot encode x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); x |= (x >> 32); // count 1's __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); return count - 1; #else intptr_t result = 0; if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; } if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; } if( uintptr_t tmp = x >> 8 ) { x = tmp; result += 8; } if( uintptr_t tmp = x >> 4 ) { x = tmp; result += 4; } if( uintptr_t tmp = x >> 2 ) { x = tmp; result += 2; } return (x & 2) ? result + 1 : result; #endif } //////////////////////////////////////////////////////////////////////////////////////////////////// // tbb::detail::reverse_bits() implementation //////////////////////////////////////////////////////////////////////////////////////////////////// #if TBB_USE_CLANG_BITREVERSE_BUILTINS namespace llvm_builtins { inline uint8_t builtin_bitreverse(uint8_t x) { return __builtin_bitreverse8 (x); } inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); } inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); } inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); } } #else // generic template struct reverse { static const T byte_table[256]; }; template const T reverse::byte_table[256] = { 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF }; inline unsigned char reverse_byte(unsigned char src) { return reverse::byte_table[src]; } #endif // TBB_USE_CLANG_BITREVERSE_BUILTINS template T machine_reverse_bits(T src) { #if TBB_USE_CLANG_BITREVERSE_BUILTINS return builtin_bitreverse(fixed_width_cast(src)); #else /* Generic */ T dst; unsigned char *original = (unsigned char *) &src; unsigned char *reversed = (unsigned char *) &dst; for ( int i = sizeof(T) - 1; i >= 0; i-- ) { reversed[i] = reverse_byte( original[sizeof(T) - i - 1] ); } return dst; #endif // TBB_USE_CLANG_BITREVERSE_BUILTINS } } // inline namespace d0 namespace d1 { #if (_WIN32) // API to retrieve/update FPU control setting #define __TBB_CPU_CTL_ENV_PRESENT 1 struct cpu_ctl_env { unsigned int x87cw{}; #if (__TBB_x86_64) // Changing the infinity mode or the floating-point precision is not supported on x64. // The attempt causes an assertion. See // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2 static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC; #else static constexpr unsigned int X87CW_CONTROL_MASK = ~0U; #endif #if (__TBB_x86_32 || __TBB_x86_64) unsigned int mxcsr{}; static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */ #endif bool operator!=( const cpu_ctl_env& ctl ) const { return #if (__TBB_x86_32 || __TBB_x86_64) mxcsr != ctl.mxcsr || #endif x87cw != ctl.x87cw; } void get_env() { x87cw = _control87(0, 0); #if (__TBB_x86_32 || __TBB_x86_64) mxcsr = _mm_getcsr(); #endif } void set_env() const { _control87(x87cw, X87CW_CONTROL_MASK); #if (__TBB_x86_32 || __TBB_x86_64) _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK); #endif } }; #elif (__TBB_x86_32 || __TBB_x86_64) // API to retrieve/update FPU control setting #define __TBB_CPU_CTL_ENV_PRESENT 1 struct cpu_ctl_env { int mxcsr{}; short x87cw{}; static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */ bool operator!=(const cpu_ctl_env& ctl) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; } void get_env() { __asm__ __volatile__( "stmxcsr %0\n\t" "fstcw %1" : "=m"(mxcsr), "=m"(x87cw) ); mxcsr &= MXCSR_CONTROL_MASK; } void set_env() const { __asm__ __volatile__( "ldmxcsr %0\n\t" "fldcw %1" : : "m"(mxcsr), "m"(x87cw) ); } }; #endif } // namespace d1 } // namespace detail } // namespace tbb #if !__TBB_CPU_CTL_ENV_PRESENT #include #include namespace tbb { namespace detail { namespace r1 { void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); } // namespace r1 namespace d1 { class cpu_ctl_env { fenv_t *my_fenv_ptr; public: cpu_ctl_env() : my_fenv_ptr(NULL) {} ~cpu_ctl_env() { if ( my_fenv_ptr ) r1::cache_aligned_deallocate( (void*)my_fenv_ptr ); } // It is possible not to copy memory but just to copy pointers but the following issues should be addressed: // 1. The arena lifetime and the context lifetime are independent; // 2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside // dispatch loop may become invalid. // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation // with a platform specific implementation. cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) { *this = src; } cpu_ctl_env& operator=( const cpu_ctl_env &src ) { __TBB_ASSERT( src.my_fenv_ptr, NULL ); if ( !my_fenv_ptr ) my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); *my_fenv_ptr = *src.my_fenv_ptr; return *this; } bool operator!=( const cpu_ctl_env &ctl ) const { __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." ); return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) ); } void get_env () { if ( !my_fenv_ptr ) my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); fegetenv( my_fenv_ptr ); } const cpu_ctl_env& set_env () const { __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); fesetenv( my_fenv_ptr ); return *this; } }; } // namespace d1 } // namespace detail } // namespace tbb #endif /* !__TBB_CPU_CTL_ENV_PRESENT */ #endif // __TBB_detail__machine_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_mutex_common.h000066400000000000000000000044531514453371700330720ustar00rootroot00000000000000/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__mutex_common_H #define __TBB_detail__mutex_common_H #include "_config.h" #include "_utils.h" #if __TBB_CPP20_CONCEPTS_PRESENT #include namespace tbb { namespace detail { inline namespace d0 { template concept mutex_scoped_lock = std::default_initializable && std::constructible_from && requires( Lock& lock, Mutex& mutex ) { lock.acquire(mutex); { lock.try_acquire(mutex) } -> adaptive_same_as; lock.release(); }; template concept rw_mutex_scoped_lock = mutex_scoped_lock && std::constructible_from && requires( Lock& lock, Mutex& mutex ) { lock.acquire(mutex, false); { lock.try_acquire(mutex, false) } -> adaptive_same_as; { lock.upgrade_to_writer() } -> adaptive_same_as; { lock.downgrade_to_reader() } -> adaptive_same_as; }; template concept scoped_lockable = mutex_scoped_lock; template concept rw_scoped_lockable = scoped_lockable && rw_mutex_scoped_lock; } // namespace d0 } // namespace detail } // namespace tbb #endif // __TBB_CPP20_CONCEPTS_PRESENT #endif // __TBB_detail__mutex_common_H _namespace_injection.h000066400000000000000000000014571514453371700343000ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2020-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ // All public entities of the OneAPI Spec are available under oneapi namespace // Define tbb namespace first as it might not be known yet namespace tbb {} namespace oneapi { namespace tbb = ::tbb; } level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_node_handle.h000066400000000000000000000120651514453371700326160ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__node_handle_H #define __TBB_detail__node_handle_H #include "_allocator_traits.h" #include "_assert.h" namespace tbb { namespace detail { namespace d1 { // A structure to access private node handle methods in internal TBB classes // Regular friend declaration is not convenient because classes which use node handle // can be placed in the different versioning namespaces. struct node_handle_accessor { template static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) { return nh.get_node_ptr(); } template static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) { return NodeHandleType{node_ptr}; } template static void deactivate( NodeHandleType& nh ) { nh.deactivate(); } }; // struct node_handle_accessor template class node_handle_base { public: using allocator_type = Allocator; protected: using node = Node; using allocator_traits_type = tbb::detail::allocator_traits; public: node_handle_base() : my_node(nullptr), my_allocator() {} node_handle_base(node_handle_base&& nh) : my_node(nh.my_node), my_allocator(std::move(nh.my_allocator)) { nh.my_node = nullptr; } __TBB_nodiscard bool empty() const { return my_node == nullptr; } explicit operator bool() const { return my_node != nullptr; } ~node_handle_base() { internal_destroy(); } node_handle_base& operator=( node_handle_base&& nh ) { internal_destroy(); my_node = nh.my_node; move_assign_allocators(my_allocator, nh.my_allocator); nh.deactivate(); return *this; } void swap( node_handle_base& nh ) { using std::swap; swap(my_node, nh.my_node); swap_allocators(my_allocator, nh.my_allocator); } allocator_type get_allocator() const { return my_allocator; } protected: node_handle_base( node* n ) : my_node(n) {} void internal_destroy() { if(my_node != nullptr) { allocator_traits_type::destroy(my_allocator, my_node->storage()); typename allocator_traits_type::template rebind_alloc node_allocator(my_allocator); node_allocator.deallocate(my_node, 1); } } node* get_node_ptr() { return my_node; } void deactivate() { my_node = nullptr; } node* my_node; allocator_type my_allocator; }; // node handle for maps template class node_handle : public node_handle_base { using base_type = node_handle_base; public: using key_type = Key; using mapped_type = typename Value::second_type; using allocator_type = typename base_type::allocator_type; node_handle() = default; key_type& key() const { __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object"); return *const_cast(&(this->my_node->value().first)); } mapped_type& mapped() const { __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object"); return this->my_node->value().second; } private: friend struct node_handle_accessor; node_handle( typename base_type::node* n ) : base_type(n) {} }; // class node_handle // node handle for sets template class node_handle : public node_handle_base { using base_type = node_handle_base; public: using value_type = Key; using allocator_type = typename base_type::allocator_type; node_handle() = default; value_type& value() const { __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object"); return *const_cast(&(this->my_node->value())); } private: friend struct node_handle_accessor; node_handle( typename base_type::node* n ) : base_type(n) {} }; // class node_handle template void swap( node_handle& lhs, node_handle& rhs ) { return lhs.swap(rhs); } } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__node_handle_H _pipeline_filters.h000066400000000000000000000370411514453371700336350ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_filters_H #define __TBB_parallel_filters_H #include "_config.h" #include "_task.h" #include "_pipeline_filters_deduction.h" #include "../tbb_allocator.h" #include #include namespace tbb { namespace detail { namespace d1 { class base_filter; } namespace r1 { TBB_EXPORT void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); class pipeline; class stage_task; class input_buffer; } namespace d1 { class filter_node; //! A stage in a pipeline. /** @ingroup algorithms */ class base_filter{ private: //! Value used to mark "not in pipeline" static base_filter* not_in_pipeline() { return reinterpret_cast(std::intptr_t(-1)); } public: //! The lowest bit 0 is for parallel vs serial static constexpr unsigned int filter_is_serial = 0x1; //! 2nd bit distinguishes ordered vs unordered filters. static constexpr unsigned int filter_is_out_of_order = 0x1<<1; //! 3rd bit marks input filters emitting small objects static constexpr unsigned int filter_may_emit_null = 0x1<<2; base_filter(const base_filter&) = delete; base_filter& operator=(const base_filter&) = delete; protected: explicit base_filter( unsigned int m ) : next_filter_in_pipeline(not_in_pipeline()), my_input_buffer(nullptr), my_filter_mode(m), my_pipeline(nullptr) {} // signal end-of-input for concrete_filters void set_end_of_input() { r1::set_end_of_input(*this); } public: //! True if filter is serial. bool is_serial() const { return bool( my_filter_mode & filter_is_serial ); } //! True if filter must receive stream in order. bool is_ordered() const { return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order); } //! true if an input filter can emit null bool object_may_be_null() { return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null; } //! Operate on an item from the input stream, and return item for output stream. /** Returns nullptr if filter is a sink. */ virtual void* operator()( void* item ) = 0; //! Destroy filter. virtual ~base_filter() {}; //! Destroys item if pipeline was cancelled. /** Required to prevent memory leaks. Note it can be called concurrently even for serial filters.*/ virtual void finalize( void* /*item*/ ) {} private: //! Pointer to next filter in the pipeline. base_filter* next_filter_in_pipeline; //! Buffer for incoming tokens, or nullptr if not required. /** The buffer is required if the filter is serial. */ r1::input_buffer* my_input_buffer; friend class r1::stage_task; friend class r1::pipeline; friend void r1::set_end_of_input(d1::base_filter&); //! Storage for filter mode and dynamically checked implementation version. const unsigned int my_filter_mode; //! Pointer to the pipeline. r1::pipeline* my_pipeline; }; template class concrete_filter; //! input_filter control to signal end-of-input for parallel_pipeline class flow_control { bool is_pipeline_stopped = false; flow_control() = default; template friend class concrete_filter; template __TBB_requires(std::copyable) friend class input_node; public: void stop() { is_pipeline_stopped = true; } }; // Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe). #if __TBB_CPP11_TYPE_PROPERTIES_PRESENT template using tbb_trivially_copyable = std::is_trivially_copyable; #else template struct tbb_trivially_copyable { enum { value = false }; }; template struct tbb_trivially_copyable < T* > { enum { value = true }; }; template<> struct tbb_trivially_copyable < bool > { enum { value = true }; }; template<> struct tbb_trivially_copyable < char > { enum { value = true }; }; template<> struct tbb_trivially_copyable < signed char > { enum { value = true }; }; template<> struct tbb_trivially_copyable { enum { value = true }; }; template<> struct tbb_trivially_copyable < short > { enum { value = true }; }; template<> struct tbb_trivially_copyable { enum { value = true }; }; template<> struct tbb_trivially_copyable < int > { enum { value = true }; }; template<> struct tbb_trivially_copyable { enum { value = true }; }; template<> struct tbb_trivially_copyable < long > { enum { value = true }; }; template<> struct tbb_trivially_copyable { enum { value = true }; }; template<> struct tbb_trivially_copyable < long long> { enum { value = true }; }; template<> struct tbb_trivially_copyable { enum { value = true }; }; template<> struct tbb_trivially_copyable < float > { enum { value = true }; }; template<> struct tbb_trivially_copyable < double > { enum { value = true }; }; template<> struct tbb_trivially_copyable < long double > { enum { value = true }; }; #endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT template struct use_allocator { static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable::value; }; // A helper class to customize how a type is passed between filters. // Usage: token_helper::value> template struct token_helper; // using tbb_allocator template struct token_helper { using pointer = T*; using value_type = T; static pointer create_token(value_type && source) { return new (r1::allocate_memory(sizeof(T))) T(std::move(source)); } static value_type & token(pointer & t) { return *t; } static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast(ref); } static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast(ref); } static void destroy_token(pointer token) { token->~value_type(); r1::deallocate_memory(token); } }; // pointer specialization template struct token_helper { using pointer = T*; using value_type = T*; static pointer create_token(const value_type & source) { return source; } static value_type & token(pointer & t) { return t; } static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast(ref); } static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast(ref); } static void destroy_token( pointer /*token*/) {} }; // converting type to and from void*, passing objects directly template struct token_helper { typedef union { T actual_value; void * void_overlay; } type_to_void_ptr_map; using pointer = T; // not really a pointer in this case. using value_type = T; static pointer create_token(const value_type & source) { return source; } static value_type & token(pointer & t) { return t; } static void * cast_to_void_ptr(pointer ref) { type_to_void_ptr_map mymap; mymap.void_overlay = nullptr; mymap.actual_value = ref; return mymap.void_overlay; } static pointer cast_from_void_ptr(void * ref) { type_to_void_ptr_map mymap; mymap.void_overlay = ref; return mymap.actual_value; } static void destroy_token( pointer /*token*/) {} }; // intermediate template class concrete_filter: public base_filter { const Body& my_body; using input_helper = token_helper::value>; using input_pointer = typename input_helper::pointer; using output_helper = token_helper::value>; using output_pointer = typename output_helper::pointer; void* operator()(void* input) override { input_pointer temp_input = input_helper::cast_from_void_ptr(input); output_pointer temp_output = output_helper::create_token(my_body(std::move(input_helper::token(temp_input)))); input_helper::destroy_token(temp_input); return output_helper::cast_to_void_ptr(temp_output); } void finalize(void * input) override { input_pointer temp_input = input_helper::cast_from_void_ptr(input); input_helper::destroy_token(temp_input); } public: concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} }; // input template class concrete_filter: public base_filter { const Body& my_body; using output_helper = token_helper::value>; using output_pointer = typename output_helper::pointer; void* operator()(void*) override { flow_control control; output_pointer temp_output = output_helper::create_token(my_body(control)); if(control.is_pipeline_stopped) { output_helper::destroy_token(temp_output); set_end_of_input(); return nullptr; } return output_helper::cast_to_void_ptr(temp_output); } public: concrete_filter(unsigned int m, const Body& body) : base_filter(m | filter_may_emit_null), my_body(body) {} }; // output template class concrete_filter: public base_filter { const Body& my_body; using input_helper = token_helper::value>; using input_pointer = typename input_helper::pointer; void* operator()(void* input) override { input_pointer temp_input = input_helper::cast_from_void_ptr(input); my_body(std::move(input_helper::token(temp_input))); input_helper::destroy_token(temp_input); return nullptr; } void finalize(void* input) override { input_pointer temp_input = input_helper::cast_from_void_ptr(input); input_helper::destroy_token(temp_input); } public: concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} }; template class concrete_filter: public base_filter { const Body& my_body; void* operator()(void*) override { flow_control control; my_body(control); void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1; return output; } public: concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} }; class filter_node_ptr { filter_node * my_node; public: filter_node_ptr() : my_node(nullptr) {} filter_node_ptr(filter_node *); ~filter_node_ptr(); filter_node_ptr(const filter_node_ptr &); filter_node_ptr(filter_node_ptr &&); void operator=(filter_node *); void operator=(const filter_node_ptr &); void operator=(filter_node_ptr &&); filter_node& operator*() const; operator bool() const; }; //! Abstract base class that represents a node in a parse tree underlying a filter class. /** These nodes are always heap-allocated and can be shared by filter objects. */ class filter_node { /** Count must be atomic because it is hidden state for user, but might be shared by threads. */ std::atomic ref_count; public: filter_node_ptr left; filter_node_ptr right; protected: filter_node() : ref_count(0), left(nullptr), right(nullptr) { #ifdef __TBB_TEST_FILTER_NODE_COUNT ++(__TBB_TEST_FILTER_NODE_COUNT); #endif } public: filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){ left = x; right = y; } filter_node(const filter_node&) = delete; filter_node& operator=(const filter_node&) = delete; //! Add concrete_filter to pipeline virtual base_filter* create_filter() const { __TBB_ASSERT(false, "method of non-leaf was called"); return nullptr; } //! Increment reference count void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); } //! Decrement reference count and delete if it becomes zero. void remove_ref() { __TBB_ASSERT(ref_count>0,"ref_count underflow"); if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) { this->~filter_node(); r1::deallocate_memory(this); } } virtual ~filter_node() { #ifdef __TBB_TEST_FILTER_NODE_COUNT --(__TBB_TEST_FILTER_NODE_COUNT); #endif } }; inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) { if (my_node) { my_node->add_ref(); } } inline filter_node_ptr::~filter_node_ptr() { if (my_node) { my_node->remove_ref(); } } inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) { if (my_node) { my_node->add_ref(); } } inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) { rhs.my_node = nullptr; } inline void filter_node_ptr::operator=(filter_node * rhs) { // Order of operations below carefully chosen so that reference counts remain correct // in unlikely event that remove_ref throws exception. filter_node* old = my_node; my_node = rhs; if (my_node) { my_node->add_ref(); } if (old) { old->remove_ref(); } } inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) { *this = rhs.my_node; } inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { filter_node* old = my_node; my_node = rhs.my_node; rhs.my_node = nullptr; if (old) { old->remove_ref(); } } inline filter_node& filter_node_ptr::operator*() const{ __TBB_ASSERT(my_node,"NULL node is used"); return *my_node; } inline filter_node_ptr::operator bool() const { return my_node != nullptr; } //! Node in parse tree representing result of make_filter. template class filter_node_leaf: public filter_node { const unsigned int my_mode; const Body my_body; base_filter* create_filter() const override { return new(r1::allocate_memory(sizeof(concrete_filter))) concrete_filter(my_mode,my_body); } public: filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {} }; template ::input_type> using filter_input = typename std::conditional::value, void, Input>::type; template using filter_output = typename body_types::output_type; } // namespace d1 } // namespace detail } // namespace tbb #endif /* __TBB_parallel_filters_H */ _pipeline_filters_deduction.h000066400000000000000000000027711514453371700356750ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__pipeline_filters_deduction_H #define __TBB__pipeline_filters_deduction_H #include "_config.h" #include #include namespace tbb { namespace detail { namespace d1 { template struct declare_fitler_types { using input_type = typename std::remove_const::type>::type; using output_type = typename std::remove_const::type>::type; }; template struct body_types; template struct body_types : declare_fitler_types {}; template struct body_types : declare_fitler_types {}; } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB__pipeline_filters_deduction_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_range_common.h000066400000000000000000000110431514453371700330150ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__range_common_H #define __TBB_detail__range_common_H #include "_config.h" #include "_utils.h" #if __TBB_CPP20_CONCEPTS_PRESENT #include #endif #include namespace tbb { namespace detail { inline namespace d0 { //! Dummy type that distinguishes splitting constructor from copy constructor. /** * See description of parallel_for and parallel_reduce for example usages. * @ingroup algorithms */ class split {}; //! Type enables transmission of splitting proportion from partitioners to range objects /** * In order to make use of such facility Range objects must implement * splitting constructor with this type passed. */ class proportional_split : no_assign { public: proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { } size_t left() const { return my_left; } size_t right() const { return my_right; } // used when range does not support proportional split explicit operator split() const { return split(); } private: size_t my_left, my_right; }; template struct range_split_object_provider { template static split get( PartitionerSplitType& ) { return split(); } }; template struct range_split_object_provider::value>::type> { template static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; } }; template auto get_range_split_object( PartitionerSplitType& split_obj ) -> decltype(range_split_object_provider::get(split_obj)) { return range_split_object_provider::get(split_obj); } template using range_iterator_type = decltype(std::begin(std::declval())); #if __TBB_CPP20_CONCEPTS_PRESENT template using iterator_reference_type = typename std::iterator_traits::reference; template using range_reference_type = iterator_reference_type>; template concept blocked_range_value = std::copyable && requires( const std::remove_reference_t& lhs, const std::remove_reference_t& rhs ) { { lhs < rhs } -> relaxed_convertible_to; { lhs - rhs } -> std::convertible_to; { lhs + (rhs - lhs) } -> std::convertible_to; }; template concept splittable = std::constructible_from; template concept tbb_range = std::copy_constructible && splittable && requires( const std::remove_reference_t& range ) { { range.empty() } -> relaxed_convertible_to; { range.is_divisible() } -> relaxed_convertible_to; }; template constexpr bool iterator_concept_helper( std::input_iterator_tag ) { return std::input_iterator; } template constexpr bool iterator_concept_helper( std::random_access_iterator_tag ) { return std::random_access_iterator; } template concept iterator_satisfies = requires (IteratorTag tag) { requires iterator_concept_helper(tag); }; template concept container_based_sequence = requires( Sequence& seq ) { { std::begin(seq) } -> iterator_satisfies; { std::end(seq) } -> iterator_satisfies; }; #endif // __TBB_CPP20_CONCEPTS_PRESENT } // namespace d0 } // namespace detail } // namespace tbb #endif // __TBB_detail__range_common_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_rtm_mutex.h000066400000000000000000000112611514453371700323770ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__rtm_mutex_impl_H #define __TBB__rtm_mutex_impl_H #include "_assert.h" #include "_utils.h" #include "../spin_mutex.h" #include "../profiling.h" namespace tbb { namespace detail { namespace r1 { struct rtm_mutex_impl; } namespace d1 { #if _MSC_VER && !defined(__INTEL_COMPILER) // Suppress warning: structure was padded due to alignment specifier #pragma warning (push) #pragma warning (disable: 4324) #endif /** A rtm_mutex is an speculation-enabled spin mutex. It should be used for locking short critical sections where the lock is contended but the data it protects are not. If zero-initialized, the mutex is considered unheld. @ingroup synchronization */ class alignas(max_nfs_size) rtm_mutex : private spin_mutex { private: enum class rtm_state { rtm_none, rtm_transacting, rtm_real }; public: //! Constructors rtm_mutex() noexcept { create_itt_sync(this, "tbb::speculative_spin_mutex", ""); } //! Destructor ~rtm_mutex() = default; //! Represents acquisition of a mutex. class scoped_lock { public: friend class rtm_mutex; //! Construct lock that has not acquired a mutex. /** Equivalent to zero-initialization of *this. */ constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {} //! Acquire lock on given mutex. scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) { acquire(m); } //! Release lock (if lock is held). ~scoped_lock() { if(m_transaction_state != rtm_state::rtm_none) { release(); } } //! No Copy scoped_lock(const scoped_lock&) = delete; scoped_lock& operator=(const scoped_lock&) = delete; //! Acquire lock on given mutex. void acquire(rtm_mutex& m); //! Try acquire lock on given mutex. bool try_acquire(rtm_mutex& m); //! Release lock void release(); private: rtm_mutex* m_mutex; rtm_state m_transaction_state; friend r1::rtm_mutex_impl; }; //! Mutex traits static constexpr bool is_rw_mutex = false; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = false; private: friend r1::rtm_mutex_impl; }; // end of rtm_mutex } // namespace d1 namespace r1 { //! Internal acquire lock. // only_speculate == true if we're doing a try_lock, else false. TBB_EXPORT void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); //! Internal try_acquire lock. TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); //! Internal release lock. TBB_EXPORT void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); } // namespace r1 namespace d1 { //! Acquire lock on given mutex. inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) { __TBB_ASSERT(!m_mutex, "lock is already acquired"); r1::acquire(m, *this); } //! Try acquire lock on given mutex. inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) { __TBB_ASSERT(!m_mutex, "lock is already acquired"); return r1::try_acquire(m, *this); } //! Release lock inline void rtm_mutex::scoped_lock::release() { __TBB_ASSERT(m_mutex, "lock is not acquired"); __TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired"); return r1::release(*this); } #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning (pop) // 4324 warning #endif #if TBB_USE_PROFILING_TOOLS inline void set_name(rtm_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } #if (_WIN32||_WIN64) inline void set_name(rtm_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif // WIN #else inline void set_name(rtm_mutex&, const char*) {} #if (_WIN32||_WIN64) inline void set_name(rtm_mutex&, const wchar_t*) {} #endif // WIN #endif } // namespace d1 } // namespace detail } // namespace tbb #endif /* __TBB__rtm_mutex_impl_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_rtm_rw_mutex.h000066400000000000000000000160521514453371700331120ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__rtm_rw_mutex_H #define __TBB_detail__rtm_rw_mutex_H #include "_assert.h" #include "_utils.h" #include "../spin_rw_mutex.h" #include namespace tbb { namespace detail { namespace r1 { struct rtm_rw_mutex_impl; } namespace d1 { constexpr std::size_t speculation_granularity = 64; #if _MSC_VER && !defined(__INTEL_COMPILER) // Suppress warning: structure was padded due to alignment specifier #pragma warning (push) #pragma warning (disable: 4324) #endif //! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference /** @ingroup synchronization */ class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex { friend struct r1::rtm_rw_mutex_impl; private: enum class rtm_type { rtm_not_in_mutex, rtm_transacting_reader, rtm_transacting_writer, rtm_real_reader, rtm_real_writer }; public: //! Constructors rtm_rw_mutex() noexcept : write_flag(false) { create_itt_sync(this, "tbb::speculative_spin_rw_mutex", ""); } //! Destructor ~rtm_rw_mutex() = default; //! Represents acquisition of a mutex. class scoped_lock { friend struct r1::rtm_rw_mutex_impl; public: //! Construct lock that has not acquired a mutex. /** Equivalent to zero-initialization of *this. */ constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {} //! Acquire lock on given mutex. scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) { acquire(m, write); } //! Release lock (if lock is held). ~scoped_lock() { if(m_transaction_state != rtm_type::rtm_not_in_mutex) { release(); } } //! No Copy scoped_lock(const scoped_lock&) = delete; scoped_lock& operator=(const scoped_lock&) = delete; //! Acquire lock on given mutex. inline void acquire(rtm_rw_mutex& m, bool write = true); //! Try acquire lock on given mutex. inline bool try_acquire(rtm_rw_mutex& m, bool write = true); //! Release lock inline void release(); //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ inline bool upgrade_to_writer(); //! Downgrade writer to become a reader. inline bool downgrade_to_reader(); inline bool is_writer() const; private: rtm_rw_mutex* m_mutex; rtm_type m_transaction_state; }; //! Mutex traits static constexpr bool is_rw_mutex = true; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = false; private: alignas(speculation_granularity) std::atomic write_flag; }; #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning (pop) // 4324 warning #endif } // namespace d1 namespace r1 { //! Internal acquire write lock. // only_speculate == true if we're doing a try_lock, else false. TBB_EXPORT void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); //! Internal acquire read lock. // only_speculate == true if we're doing a try_lock, else false. TBB_EXPORT void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); //! Internal upgrade reader to become a writer. TBB_EXPORT bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); //! Internal downgrade writer to become a reader. TBB_EXPORT bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); //! Internal try_acquire write lock. TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); //! Internal try_acquire read lock. TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); //! Internal release lock. TBB_EXPORT void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); } namespace d1 { //! Acquire lock on given mutex. void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) { __TBB_ASSERT(!m_mutex, "lock is already acquired"); if (write) { r1::acquire_writer(m, *this); } else { r1::acquire_reader(m, *this); } } //! Try acquire lock on given mutex. bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) { __TBB_ASSERT(!m_mutex, "lock is already acquired"); if (write) { return r1::try_acquire_writer(m, *this); } else { return r1::try_acquire_reader(m, *this); } } //! Release lock void rtm_rw_mutex::scoped_lock::release() { __TBB_ASSERT(m_mutex, "lock is not acquired"); __TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired"); return r1::release(*this); } //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() { __TBB_ASSERT(m_mutex, "lock is not acquired"); if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) { return true; // Already a writer } return r1::upgrade(*this); } //! Downgrade writer to become a reader. bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() { __TBB_ASSERT(m_mutex, "lock is not acquired"); if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) { return true; // Already a reader } return r1::downgrade(*this); } bool rtm_rw_mutex::scoped_lock::is_writer() const { __TBB_ASSERT(m_mutex, "lock is not acquired"); return m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer; } #if TBB_USE_PROFILING_TOOLS inline void set_name(rtm_rw_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } #if (_WIN32||_WIN64) inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif // WIN #else inline void set_name(rtm_rw_mutex&, const char*) {} #if (_WIN32||_WIN64) inline void set_name(rtm_rw_mutex&, const wchar_t*) {} #endif // WIN #endif } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__rtm_rw_mutex_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_scoped_lock.h000066400000000000000000000115451514453371700326450ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail_scoped_lock_H #define __TBB_detail_scoped_lock_H namespace tbb { namespace detail { namespace d1 { // unique_scoped_lock supposes that Mutex operations never throw template class unique_scoped_lock { //! Points to currently held Mutex, or NULL if no lock is held. Mutex* m_mutex{}; public: //! Construct without acquiring a Mutex. constexpr unique_scoped_lock() noexcept : m_mutex(nullptr) {} //! Construct and acquire lock on a Mutex. unique_scoped_lock(Mutex& m) { acquire(m); } //! No Copy unique_scoped_lock(const unique_scoped_lock&) = delete; unique_scoped_lock& operator=(const unique_scoped_lock&) = delete; //! Acquire lock. void acquire(Mutex& m) { __TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired"); m_mutex = &m; m.lock(); } //! Try acquiring lock (non-blocking) /** Return true if lock acquired; false otherwise. */ bool try_acquire(Mutex& m) { __TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired"); bool succeed = m.try_lock(); if (succeed) { m_mutex = &m; } return succeed; } //! Release lock void release() { __TBB_ASSERT(m_mutex, "release on Mutex::unique_scoped_lock that is not holding a lock"); m_mutex->unlock(); m_mutex = nullptr; } //! Destroy lock. If holding a lock, releases the lock first. ~unique_scoped_lock() { if (m_mutex) { release(); } } }; // rw_scoped_lock supposes that Mutex operations never throw template class rw_scoped_lock { public: //! Construct lock that has not acquired a mutex. /** Equivalent to zero-initialization of *this. */ constexpr rw_scoped_lock() noexcept {} //! Acquire lock on given mutex. rw_scoped_lock(Mutex& m, bool write = true) { acquire(m, write); } //! Release lock (if lock is held). ~rw_scoped_lock() { if (m_mutex) { release(); } } //! No Copy rw_scoped_lock(const rw_scoped_lock&) = delete; rw_scoped_lock& operator=(const rw_scoped_lock&) = delete; //! Acquire lock on given mutex. void acquire(Mutex& m, bool write = true) { __TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired"); m_is_writer = write; m_mutex = &m; if (write) { m_mutex->lock(); } else { m_mutex->lock_shared(); } } //! Try acquire lock on given mutex. bool try_acquire(Mutex& m, bool write = true) { bool succeed = write ? m.try_lock() : m.try_lock_shared(); if (succeed) { m_mutex = &m; m_is_writer = write; } return succeed; } //! Release lock. void release() { __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); Mutex* m = m_mutex; m_mutex = nullptr; if (m_is_writer) { m->unlock(); } else { m->unlock_shared(); } } //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ bool upgrade_to_writer() { __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); if (m_is_writer) { return true; // Already a writer } m_is_writer = true; return m_mutex->upgrade(); } //! Downgrade writer to become a reader. bool downgrade_to_reader() { __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); if (m_is_writer) { m_mutex->downgrade(); m_is_writer = false; } return true; } bool is_writer() const { __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); return m_is_writer; } protected: //! The pointer to the current mutex that is held, or nullptr if no mutex is held. Mutex* m_mutex {nullptr}; //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. /** Not defined if not holding a lock. */ bool m_is_writer {false}; }; } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail_scoped_lock_H _segment_table.h000066400000000000000000000600711514453371700331100ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__segment_table_H #define __TBB_detail__segment_table_H #include "_config.h" #include "_allocator_traits.h" #include "_template_helpers.h" #include "_utils.h" #include "_assert.h" #include "_exception.h" #include #include #include #include #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning(push) #pragma warning(disable: 4127) // warning C4127: conditional expression is constant #endif namespace tbb { namespace detail { namespace d1 { template class segment_table { public: using value_type = T; using segment_type = T*; using atomic_segment = std::atomic; using segment_table_type = atomic_segment*; using size_type = std::size_t; using segment_index_type = std::size_t; using allocator_type = Allocator; using allocator_traits_type = tbb::detail::allocator_traits; using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc; protected: using segment_table_allocator_traits = tbb::detail::allocator_traits; using derived_type = DerivedType; static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable; static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8; public: segment_table( const allocator_type& alloc = allocator_type() ) : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { zero_table(my_embedded_table, pointers_per_embedded_table); } segment_table( const segment_table& other ) : my_segment_table_allocator(segment_table_allocator_traits:: select_on_container_copy_construction(other.my_segment_table_allocator)) , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { zero_table(my_embedded_table, pointers_per_embedded_table); try_call( [&] { internal_transfer(other, copy_segment_body_type{*this}); } ).on_exception( [&] { clear(); }); } segment_table( const segment_table& other, const allocator_type& alloc ) : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { zero_table(my_embedded_table, pointers_per_embedded_table); try_call( [&] { internal_transfer(other, copy_segment_body_type{*this}); } ).on_exception( [&] { clear(); }); } segment_table( segment_table&& other ) : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table) , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} { zero_table(my_embedded_table, pointers_per_embedded_table); internal_move(std::move(other)); } segment_table( segment_table&& other, const allocator_type& alloc ) : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{} , my_size{}, my_segment_table_allocation_failed{} { zero_table(my_embedded_table, pointers_per_embedded_table); using is_equal_type = typename segment_table_allocator_traits::is_always_equal; internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type()); } ~segment_table() { clear(); } segment_table& operator=( const segment_table& other ) { if (this != &other) { copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); internal_transfer(other, copy_segment_body_type{*this}); } return *this; } segment_table& operator=( segment_table&& other ) noexcept(derived_type::is_noexcept_assignment) { using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment; using is_equal_type = typename segment_table_allocator_traits::is_always_equal; if (this != &other) { move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); internal_move_assign(std::move(other), tbb::detail::disjunction()); } return *this; } void swap( segment_table& other ) noexcept(derived_type::is_noexcept_swap) { using is_equal_type = typename segment_table_allocator_traits::is_always_equal; using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap; if (this != &other) { swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator); internal_swap(other, tbb::detail::disjunction()); } } segment_type get_segment( segment_index_type index ) const { return get_table()[index] + segment_base(index); } value_type& operator[]( size_type index ) { return internal_subscript(index); } const value_type& operator[]( size_type index ) const { return const_cast(this)->internal_subscript(index); } const segment_table_allocator_type& get_allocator() const { return my_segment_table_allocator; } segment_table_allocator_type& get_allocator() { return my_segment_table_allocator; } void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) { // Allocate new segment segment_type new_segment = self()->create_segment(table, seg_index, index); if (new_segment != nullptr) { // Store (new_segment - segment_base) into the segment table to allow access to the table by index via // my_segment_table[segment_index_of(index)][index] segment_type disabled_segment = nullptr; if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) { // compare_exchange failed => some other thread has already enabled this segment // Deallocate the memory self()->deallocate_segment(new_segment, seg_index); } } segment = table[seg_index].load(std::memory_order_acquire); __TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table"); } void delete_segment( segment_index_type seg_index ) { segment_type segment_to_delete = self()->nullify_segment(get_table(), seg_index); if (segment_to_delete == segment_allocation_failure_tag) { return; } segment_to_delete += segment_base(seg_index); // Deallocate the segment self()->destroy_segment(segment_to_delete, seg_index); } size_type number_of_segments( segment_table_type table ) const { // Check for an active table, if it is embedded table - return the number of embedded segments // Otherwise - return the maximum number of segments return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table; } size_type capacity() const noexcept { segment_table_type table = get_table(); size_type num_segments = number_of_segments(table); for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { // Check if the pointer is valid (allocated) if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) { return segment_base(seg_index); } } return segment_base(num_segments); } size_type find_last_allocated_segment( segment_table_type table ) const noexcept { size_type end = 0; size_type num_segments = number_of_segments(table); for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { // Check if the pointer is valid (allocated) if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) { end = seg_index + 1; } } return end; } void reserve( size_type n ) { if (n > allocator_traits_type::max_size(my_segment_table_allocator)) { throw_exception(exception_id::reservation_length_error); } size_type size = my_size.load(std::memory_order_relaxed); segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1; for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) { size_type first_index = segment_base(seg_idx); internal_subscript(first_index); } } void clear() { clear_segments(); clear_table(); my_size.store(0, std::memory_order_relaxed); my_first_block.store(0, std::memory_order_relaxed); } void clear_segments() { segment_table_type current_segment_table = get_table(); for (size_type i = number_of_segments(current_segment_table); i != 0; --i) { if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) { // If the segment was enabled - disable and deallocate it delete_segment(i - 1); } } } void clear_table() { segment_table_type current_segment_table = get_table(); if (current_segment_table != my_embedded_table) { // If the active table is not the embedded one - deallocate the active table for (size_type i = 0; i != pointers_per_long_table; ++i) { segment_table_allocator_traits::destroy(my_segment_table_allocator, ¤t_segment_table[i]); } segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table); my_segment_table.store(my_embedded_table, std::memory_order_relaxed); zero_table(my_embedded_table, pointers_per_embedded_table); } } void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) { // extend_segment_table if an active table is an embedded table // and the requested index is not in the embedded table if (table == my_embedded_table && end_index > embedded_table_size) { if (start_index <= embedded_table_size) { try_call([&] { table = self()->allocate_long_table(my_embedded_table, start_index); // It is possible that the table was extended by the thread that allocated first_block. // In this case it is necessary to re-read the current table. if (table) { my_segment_table.store(table, std::memory_order_release); } else { table = my_segment_table.load(std::memory_order_acquire); } }).on_exception([&] { my_segment_table_allocation_failed.store(true, std::memory_order_relaxed); }); } else { atomic_backoff backoff; do { if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) { throw_exception(exception_id::bad_alloc); } backoff.pause(); table = my_segment_table.load(std::memory_order_acquire); } while (table == my_embedded_table); } } } // Return the segment where index is stored static constexpr segment_index_type segment_index_of( size_type index ) { return size_type(tbb::detail::log2(uintptr_t(index|1))); } // Needed to calculate the offset in segment static constexpr size_type segment_base( size_type index ) { return size_type(1) << index & ~size_type(1); } // Return size of the segment static constexpr size_type segment_size( size_type index ) { return index == 0 ? 2 : size_type(1) << index; } private: derived_type* self() { return static_cast(this); } struct copy_segment_body_type { void operator()( segment_index_type index, segment_type from, segment_type to ) const { my_instance.self()->copy_segment(index, from, to); } segment_table& my_instance; }; struct move_segment_body_type { void operator()( segment_index_type index, segment_type from, segment_type to ) const { my_instance.self()->move_segment(index, from, to); } segment_table& my_instance; }; // Transgers all segments from the other table template void internal_transfer( const segment_table& other, TransferBody transfer_segment ) { static_cast(this)->destroy_elements(); assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed)); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); segment_table_type other_table = other.get_table(); size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table)); // If an exception occurred in other, then the size may be greater than the size of the end segment. size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ? other.my_size.load(std::memory_order_relaxed) : end_segment_size; other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size; for (segment_index_type i = 0; segment_base(i) < other_size; ++i) { // If the segment in other table is enabled - transfer it if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag) { my_size = segment_base(i); break; } else if (other_table[i].load(std::memory_order_relaxed) != nullptr) { internal_subscript(segment_base(i)); transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i), get_table()[i].load(std::memory_order_relaxed) + segment_base(i)); } } } // Moves the other segment table // Only equal allocators are allowed void internal_move( segment_table&& other ) { // NOTE: allocators should be equal clear(); my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); // If an active table in other is embedded - restore all of the embedded segments if (other.get_table() == other.my_embedded_table) { for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) { segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); my_embedded_table[i].store(other_segment, std::memory_order_relaxed); other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed); } my_segment_table.store(my_embedded_table, std::memory_order_relaxed); } else { my_segment_table.store(other.my_segment_table, std::memory_order_relaxed); other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); zero_table(other.my_embedded_table, pointers_per_embedded_table); } other.my_size.store(0, std::memory_order_relaxed); } // Move construct the segment table with the allocator object // if any instances of allocator_type are always equal void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&, /*is_always_equal = */ std::true_type ) { internal_move(std::move(other)); } // Move construct the segment table with the allocator object // if any instances of allocator_type are always equal void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc, /*is_always_equal = */ std::false_type ) { if (other.my_segment_table_allocator == alloc) { // If allocators are equal - restore pointers internal_move(std::move(other)); } else { // If allocators are not equal - perform per element move with reallocation try_call( [&] { internal_transfer(other, move_segment_body_type{*this}); } ).on_exception( [&] { clear(); }); } } // Move assigns the segment table to other is any instances of allocator_type are always equal // or propagate_on_container_move_assignment is true void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) { internal_move(std::move(other)); } // Move assigns the segment table to other is any instances of allocator_type are not always equal // and propagate_on_container_move_assignment is false void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) { if (my_segment_table_allocator == other.my_segment_table_allocator) { // If allocators are equal - restore pointers internal_move(std::move(other)); } else { // If allocators are not equal - perform per element move with reallocation internal_transfer(other, move_segment_body_type{*this}); } } // Swaps two segment tables if any instances of allocator_type are always equal // or propagate_on_container_swap is true void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) { internal_swap_fields(other); } // Swaps two segment tables if any instances of allocator_type are not always equal // and propagate_on_container_swap is false // According to the C++ standard, swapping of two containers with unequal allocators // is an undefined behavior scenario void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) { __TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator, "Swapping with unequal allocators is not allowed"); internal_swap_fields(other); } void internal_swap_fields( segment_table& other ) { // If an active table in either *this segment table or other is an embedded one - swaps the embedded tables if (get_table() == my_embedded_table || other.get_table() == other.my_embedded_table) { for (size_type i = 0; i != pointers_per_embedded_table; ++i) { segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed); segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); my_embedded_table[i].store(other_segment, std::memory_order_relaxed); other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed); } } segment_table_type current_segment_table = get_table(); segment_table_type other_segment_table = other.get_table(); // If an active table is an embedded one - // store an active table in other to the embedded one from other if (current_segment_table == my_embedded_table) { other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); } else { // Otherwise - store it to the active segment table other.my_segment_table.store(current_segment_table, std::memory_order_relaxed); } // If an active table in other segment table is an embedded one - // store an active table in other to the embedded one from *this if (other_segment_table == other.my_embedded_table) { my_segment_table.store(my_embedded_table, std::memory_order_relaxed); } else { // Otherwise - store it to the active segment table in other my_segment_table.store(other_segment_table, std::memory_order_relaxed); } auto first_block = other.my_first_block.load(std::memory_order_relaxed); other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); my_first_block.store(first_block, std::memory_order_relaxed); auto size = other.my_size.load(std::memory_order_relaxed); other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); my_size.store(size, std::memory_order_relaxed); } protected: // A flag indicates that an exception was throws during segment allocations const segment_type segment_allocation_failure_tag = reinterpret_cast(1); static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table); template value_type& internal_subscript( size_type index ) { segment_index_type seg_index = segment_index_of(index); segment_table_type table = my_segment_table.load(std::memory_order_acquire); segment_type segment = nullptr; if (allow_out_of_range_access) { if (derived_type::allow_table_extending) { extend_table_if_necessary(table, index, index + 1); } segment = table[seg_index].load(std::memory_order_acquire); // If the required segment is disabled - enable it if (segment == nullptr) { enable_segment(segment, table, seg_index, index); } // Check if an exception was thrown during segment allocation if (segment == segment_allocation_failure_tag) { throw_exception(exception_id::bad_alloc); } } else { segment = table[seg_index].load(std::memory_order_acquire); } __TBB_ASSERT(segment != nullptr, nullptr); return segment[index]; } void assign_first_block_if_necessary(segment_index_type index) { size_type zero = 0; if (this->my_first_block.load(std::memory_order_relaxed) == zero) { this->my_first_block.compare_exchange_strong(zero, index); } } void zero_table( segment_table_type table, size_type count ) { for (size_type i = 0; i != count; ++i) { table[i].store(nullptr, std::memory_order_relaxed); } } segment_table_type get_table() const { return my_segment_table.load(std::memory_order_acquire); } segment_table_allocator_type my_segment_table_allocator; std::atomic my_segment_table; atomic_segment my_embedded_table[pointers_per_embedded_table]; // Number of segments in first block std::atomic my_first_block; // Number of elements in table std::atomic my_size; // Flag to indicate failed extend table std::atomic my_segment_table_allocation_failed; }; // class segment_table } // namespace d1 } // namespace detail } // namespace tbb #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning(pop) // warning 4127 is back #endif #endif // __TBB_detail__segment_table_H _small_object_pool.h000066400000000000000000000067661514453371700340010ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2020-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__small_object_pool_H #define __TBB__small_object_pool_H #include "_config.h" #include "_assert.h" #include "../profiling.h" #include #include #include namespace tbb { namespace detail { namespace d1 { class small_object_pool { protected: small_object_pool() = default; }; struct execution_data; } namespace r1 { TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, const d1::execution_data& ed); TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed); TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); } namespace d1 { class small_object_allocator { public: template Type* new_object(execution_data& ed, Args&&... args) { void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed); auto constructed_object = new(allocated_object) Type(std::forward(args)...); return constructed_object; } template Type* new_object(Args&&... args) { void* allocated_object = r1::allocate(m_pool, sizeof(Type)); auto constructed_object = new(allocated_object) Type(std::forward(args)...); return constructed_object; } template void delete_object(Type* object, const execution_data& ed) { // Copy this since it can be a member of the passed object and // unintentionally destroyed when Type destructor is called below small_object_allocator alloc = *this; object->~Type(); alloc.deallocate(object, ed); } template void delete_object(Type* object) { // Copy this since it can be a member of the passed object and // unintentionally destroyed when Type destructor is called below small_object_allocator alloc = *this; object->~Type(); alloc.deallocate(object); } template void deallocate(Type* ptr, const execution_data& ed) { call_itt_task_notify(destroy, ptr); __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); r1::deallocate(*m_pool, ptr, sizeof(Type), ed); } template void deallocate(Type* ptr) { call_itt_task_notify(destroy, ptr); __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); r1::deallocate(*m_pool, ptr, sizeof(Type)); } private: small_object_pool* m_pool{}; }; } // namespace d1 } // namespace detail } // namespace tbb #endif /* __TBB__small_object_pool_H */ _string_resource.h000066400000000000000000000074401514453371700335150ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm") TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for") TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each") TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke") TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce") TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan") TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort") TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline") TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom") TBB_STRING_RESOURCE(FLOW_NULL, "null") TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node") TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node") TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node") TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node") TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)") TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)") TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)") TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node") TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node") TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node") TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node") TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node") TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node") TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node") TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node") TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node") TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node") TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node") TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node") TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8") TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8") TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9") TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name") TBB_STRING_RESOURCE(FLOW_BODY, "body") TBB_STRING_RESOURCE(FLOW_GRAPH, "graph") TBB_STRING_RESOURCE(FLOW_NODE, "node") TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph") TBB_STRING_RESOURCE(USER_EVENT, "user_event") #if __TBB_FLOW_TRACE_CODEPTR TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address") #endif level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_task.h000066400000000000000000000157141514453371700313240ustar00rootroot00000000000000/* Copyright (c) 2020-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__task_H #define __TBB__task_H #include "_config.h" #include "_assert.h" #include "_template_helpers.h" #include "_small_object_pool.h" #include "../profiling.h" #include #include #include #include #include #include namespace tbb { namespace detail { namespace d1 { using slot_id = unsigned short; constexpr slot_id no_slot = slot_id(~0); constexpr slot_id any_slot = slot_id(~1); class task; class wait_context; class task_group_context; struct execution_data; } namespace r1 { //! Task spawn/wait entry points TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); TBB_EXPORT void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); TBB_EXPORT void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); TBB_EXPORT d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); TBB_EXPORT d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); // Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. struct suspend_point_type; using suspend_callback_type = void(*)(void*, suspend_point_type*); //! The resumable tasks entry points TBB_EXPORT void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); TBB_EXPORT void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); TBB_EXPORT suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); TBB_EXPORT void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); class thread_data; class task_dispatcher; class external_waiter; struct task_accessor; struct task_arena_impl; } // namespace r1 namespace d1 { class task_arena; using suspend_point = r1::suspend_point_type*; #if __TBB_RESUMABLE_TASKS template static void suspend_callback(void* user_callback, suspend_point sp) { // Copy user function to a new stack after the context switch to avoid a race when the previous // suspend point is resumed while the user_callback is being called. F user_callback_copy = *static_cast(user_callback); user_callback_copy(sp); } template void suspend(F f) { r1::suspend(&suspend_callback, &f); } inline void resume(suspend_point tag) { r1::resume(tag); } #endif /* __TBB_RESUMABLE_TASKS */ // TODO align wait_context on cache lane class wait_context { static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1); std::uint64_t m_version_and_traits{1}; std::atomic m_ref_count{}; void add_reference(std::int64_t delta) { call_itt_task_notify(releasing, this); std::uint64_t r = m_ref_count.fetch_add(delta) + delta; __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); if (!r) { // Some external waiters or coroutine waiters sleep in wait list // Should to notify them that work is done std::uintptr_t wait_ctx_addr = std::uintptr_t(this); r1::notify_waiters(wait_ctx_addr); } } bool continue_execution() const { std::uint64_t r = m_ref_count.load(std::memory_order_acquire); __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); return r > 0; } friend class r1::thread_data; friend class r1::task_dispatcher; friend class r1::external_waiter; friend class task_group; friend class task_group_base; friend struct r1::task_arena_impl; friend struct r1::suspend_point_type; public: // Despite the internal reference count is uin64_t we limit the user interface with uint32_t // to preserve a part of the internal reference count for special needs. wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); } wait_context(const wait_context&) = delete; ~wait_context() { __TBB_ASSERT(!continue_execution(), NULL); } void reserve(std::uint32_t delta = 1) { add_reference(delta); } void release(std::uint32_t delta = 1) { add_reference(-std::int64_t(delta)); } }; struct execution_data { task_group_context* context{}; slot_id original_slot{}; slot_id affinity_slot{}; }; inline task_group_context* context(const execution_data& ed) { return ed.context; } inline slot_id original_slot(const execution_data& ed) { return ed.original_slot; } inline slot_id affinity_slot(const execution_data& ed) { return ed.affinity_slot; } inline slot_id execution_slot(const execution_data& ed) { return r1::execution_slot(&ed); } inline bool is_same_affinity(const execution_data& ed) { return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed); } inline bool is_stolen(const execution_data& ed) { return original_slot(ed) != execution_slot(ed); } inline void spawn(task& t, task_group_context& ctx) { call_itt_task_notify(releasing, &t); r1::spawn(t, ctx); } inline void spawn(task& t, task_group_context& ctx, slot_id id) { call_itt_task_notify(releasing, &t); r1::spawn(t, ctx, id); } inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) { r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx); call_itt_task_notify(acquired, &wait_ctx); call_itt_task_notify(destroy, &wait_ctx); } inline void wait(wait_context& wait_ctx, task_group_context& ctx) { r1::wait(wait_ctx, ctx); call_itt_task_notify(acquired, &wait_ctx); call_itt_task_notify(destroy, &wait_ctx); } using r1::current_context; class task_traits { std::uint64_t m_version_and_traits{}; friend struct r1::task_accessor; }; //! Alignment for a task object static constexpr std::size_t task_alignment = 64; //! Base class for user-defined tasks. /** @ingroup task_scheduling */ class alignas(task_alignment) task : public task_traits { protected: virtual ~task() = default; public: virtual task* execute(execution_data&) = 0; virtual task* cancel(execution_data&) = 0; private: std::uint64_t m_reserved[6]{}; friend struct r1::task_accessor; }; static_assert(sizeof(task) == task_alignment, "task size is broken"); } // namespace d1 } // namespace detail } // namespace tbb #endif /* __TBB__task_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_task_handle.h000066400000000000000000000070341514453371700326330ustar00rootroot00000000000000/* Copyright (c) 2020-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_task_handle_H #define __TBB_task_handle_H #include "_config.h" #include "_task.h" #include "_small_object_pool.h" #include "_utils.h" #include namespace tbb { namespace detail { namespace d1 { class task_group_context; class wait_context; struct execution_data; } namespace d2 { class task_handle; class task_handle_task : public d1::task { std::uint64_t m_version_and_traits{}; d1::wait_context& m_wait_ctx; d1::task_group_context& m_ctx; d1::small_object_allocator m_allocator; public: void finalize(const d1::execution_data* ed = nullptr) { if (ed) { m_allocator.delete_object(this, *ed); } else { m_allocator.delete_object(this); } } task_handle_task(d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc) : m_wait_ctx(wo) , m_ctx(ctx) , m_allocator(alloc) { suppress_unused_warning(m_version_and_traits); } ~task_handle_task() override { m_wait_ctx.release(); } d1::task_group_context& ctx() const { return m_ctx; } }; class task_handle { struct task_handle_task_finalizer_t{ void operator()(task_handle_task* p){ p->finalize(); } }; using handle_impl_t = std::unique_ptr; handle_impl_t m_handle = {nullptr}; public: task_handle() = default; task_handle(task_handle&&) = default; task_handle& operator=(task_handle&&) = default; explicit operator bool() const noexcept { return static_cast(m_handle); } friend bool operator==(task_handle const& th, std::nullptr_t) noexcept; friend bool operator==(std::nullptr_t, task_handle const& th) noexcept; friend bool operator!=(task_handle const& th, std::nullptr_t) noexcept; friend bool operator!=(std::nullptr_t, task_handle const& th) noexcept; private: friend struct task_handle_accessor; task_handle(task_handle_task* t) : m_handle {t}{}; d1::task* release() { return m_handle.release(); } }; struct task_handle_accessor { static task_handle construct(task_handle_task* t) { return {t}; } static d1::task* release(task_handle& th) { return th.release(); } static d1::task_group_context& ctx_of(task_handle& th) { __TBB_ASSERT(th.m_handle, "ctx_of does not expect empty task_handle."); return th.m_handle->ctx(); } }; inline bool operator==(task_handle const& th, std::nullptr_t) noexcept { return th.m_handle == nullptr; } inline bool operator==(std::nullptr_t, task_handle const& th) noexcept { return th.m_handle == nullptr; } inline bool operator!=(task_handle const& th, std::nullptr_t) noexcept { return th.m_handle != nullptr; } inline bool operator!=(std::nullptr_t, task_handle const& th) noexcept { return th.m_handle != nullptr; } } // namespace d2 } // namespace detail } // namespace tbb #endif /* __TBB_task_handle_H */ _template_helpers.h000066400000000000000000000323151514453371700336340ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__template_helpers_H #define __TBB_detail__template_helpers_H #include "_utils.h" #include "_config.h" #include #include #include #include #include #include namespace tbb { namespace detail { inline namespace d0 { // An internal implementation of void_t, which can be used in SFINAE contexts template struct void_impl { using type = void; }; // struct void_impl template using void_t = typename void_impl::type; // Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502 template class... Checks> struct supports_impl { using type = std::false_type; }; template class... Checks> struct supports_impl...>, Checks...> { using type = std::true_type; }; template class... Checks> using supports = typename supports_impl::type; //! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size. template struct select_size_t_constant { // Explicit cast is needed to avoid compiler warnings about possible truncation. // The value of the right size, which is selected by ?:, is anyway not truncated or promoted. static const std::size_t value = (std::size_t)((sizeof(std::size_t)==sizeof(u)) ? u : ull); }; // TODO: do we really need it? //! Cast between unrelated pointer types. /** This method should be used sparingly as a last resort for dealing with situations that inherently break strict ISO C++ aliasing rules. */ // T is a pointer type because it will be explicitly provided by the programmer as a template argument; // U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process. template inline T punned_cast( U* ptr ) { std::uintptr_t x = reinterpret_cast(ptr); return reinterpret_cast(x); } template struct padded_base : T { char pad[S - R]; }; template struct padded_base : T {}; //! Pads type T to fill out to a multiple of cache line size. template struct padded : padded_base {}; #if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT using std::index_sequence; using std::make_index_sequence; #else template class index_sequence {}; template struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {}; template struct make_index_sequence_impl <0, S...> { using type = index_sequence; }; template using make_index_sequence = typename make_index_sequence_impl::type; #endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */ #if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT using std::conjunction; using std::disjunction; #else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT template struct conjunction : std::true_type {}; template struct conjunction : std::conditional, First>::type {}; template struct conjunction : T {}; template struct disjunction : std::false_type {}; template struct disjunction : std::conditional>::type {}; template struct disjunction : T {}; #endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT template using iterator_value_t = typename std::iterator_traits::value_type; template using iterator_key_t = typename std::remove_const::first_type>::type; template using iterator_mapped_t = typename iterator_value_t::second_type; template using iterator_alloc_pair_t = std::pair>::type, iterator_mapped_t>; template using alloc_value_type = typename A::value_type; template using alloc_ptr_t = typename std::allocator_traits::pointer; template using has_allocate = decltype(std::declval&>() = std::declval().allocate(0)); template using has_deallocate = decltype(std::declval().deallocate(std::declval>(), 0)); // alloc_value_type should be checked first, because it can be used in other checks template using is_allocator = supports; #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template inline constexpr bool is_allocator_v = is_allocator::value; #endif // Template class in which the "type" determines the type of the element number N in pack Args template struct pack_element { using type = void; }; template struct pack_element { using type = typename pack_element::type; }; template struct pack_element<0, T, Args...> { using type = T; }; template using pack_element_t = typename pack_element::type; template class raii_guard { public: static_assert( std::is_nothrow_copy_constructible::value && std::is_nothrow_move_constructible::value, "Throwing an exception during the Func copy or move construction cause an unexpected behavior." ); raii_guard( Func f ) noexcept : my_func(f), is_active(true) {} raii_guard( raii_guard&& g ) noexcept : my_func(std::move(g.my_func)), is_active(g.is_active) { g.is_active = false; } ~raii_guard() { if (is_active) { my_func(); } } void dismiss() { is_active = false; } private: Func my_func; bool is_active; }; // class raii_guard template raii_guard make_raii_guard( Func f ) { return raii_guard(f); } template struct try_call_proxy { try_call_proxy( Body b ) : body(b) {} template void on_exception( OnExceptionBody on_exception_body ) { auto guard = make_raii_guard(on_exception_body); body(); guard.dismiss(); } template void on_completion(OnCompletionBody on_completion_body) { auto guard = make_raii_guard(on_completion_body); body(); } Body body; }; // struct try_call_proxy // Template helper function for API // try_call(lambda1).on_exception(lambda2) // Executes lambda1 and if it throws an exception - executes lambda2 template try_call_proxy try_call( Body b ) { return try_call_proxy(b); } #if __TBB_CPP17_IS_SWAPPABLE_PRESENT using std::is_nothrow_swappable; using std::is_swappable; #else // __TBB_CPP17_IS_SWAPPABLE_PRESENT namespace is_swappable_detail { using std::swap; template using has_swap = decltype(swap(std::declval(), std::declval())); #if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER // Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant. template struct noexcept_wrapper { static const bool value = noexcept(swap(std::declval(), std::declval())); }; template struct is_nothrow_swappable_impl : std::integral_constant::value> {}; #else template struct is_nothrow_swappable_impl : std::integral_constant(), std::declval()))> {}; #endif } template struct is_swappable : supports {}; template struct is_nothrow_swappable : conjunction, is_swappable_detail::is_nothrow_swappable_impl> {}; #endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT //! Allows to store a function parameter pack as a variable and later pass it to another function template< typename... Types > struct stored_pack; template<> struct stored_pack<> { using pack_type = stored_pack<>; stored_pack() {} // Friend front-end functions template< typename F, typename Pack > friend void call(F&& f, Pack&& p); template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); protected: // Ideally, ref-qualified non-static methods would be used, // but that would greatly reduce the set of compilers where it works. template< typename Ret, typename F, typename... Preceding > static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) { return std::forward(f)(std::forward(params)...); } template< typename Ret, typename F, typename... Preceding > static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) { return std::forward(f)(std::forward(params)...); } }; template< typename T, typename... Types > struct stored_pack : stored_pack { using pack_type = stored_pack; using pack_remainder = stored_pack; // Since lifetime of original values is out of control, copies should be made. // Thus references should be stripped away from the deduced type. typename std::decay::type leftmost_value; // Here rvalue references act in the same way as forwarding references, // as long as class template parameters were deduced via forwarding references. stored_pack(T&& t, Types&&... types) : pack_remainder(std::forward(types)...), leftmost_value(std::forward(t)) {} // Friend front-end functions template< typename F, typename Pack > friend void call(F&& f, Pack&& p); template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); protected: template< typename Ret, typename F, typename... Preceding > static Ret call(F&& f, pack_type& pack, Preceding&&... params) { return pack_remainder::template call( std::forward(f), static_cast(pack), std::forward(params)... , pack.leftmost_value ); } template< typename Ret, typename F, typename... Preceding > static Ret call(F&& f, pack_type&& pack, Preceding&&... params) { return pack_remainder::template call( std::forward(f), static_cast(pack), std::forward(params)... , std::move(pack.leftmost_value) ); } }; //! Calls the given function with arguments taken from a stored_pack template< typename F, typename Pack > void call(F&& f, Pack&& p) { std::decay::type::template call(std::forward(f), std::forward(p)); } template< typename Ret, typename F, typename Pack > Ret call_and_return(F&& f, Pack&& p) { return std::decay::type::template call(std::forward(f), std::forward(p)); } template< typename... Types > stored_pack save_pack(Types&&... types) { return stored_pack(std::forward(types)...); } // A structure with the value which is equal to Trait::value // but can be used in the immediate context due to parameter T template struct dependent_bool : std::integral_constant {}; template struct body_arg_detector; template struct body_arg_detector { using arg_type = Arg; }; template struct body_arg_detector { using arg_type = Arg; }; template struct argument_detector; template struct argument_detector { using type = typename body_arg_detector::arg_type; }; template struct argument_detector { using type = Arg; }; // Detects the argument type of callable, works for callable with one argument. template using argument_type_of = typename argument_detector::type>::type; template struct type_identity { using type = T; }; template using type_identity_t = typename type_identity::type; } // inline namespace d0 } // namespace detail } // namespace tbb #endif // __TBB_detail__template_helpers_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/_utils.h000066400000000000000000000320061514453371700315130ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__utils_H #define __TBB_detail__utils_H #include #include #include #include "_config.h" #include "_assert.h" #include "_machine.h" namespace tbb { namespace detail { inline namespace d0 { //! Utility template function to prevent "unused" warnings by various compilers. template void suppress_unused_warning(T&&...) {} //! Compile-time constant that is upper bound on cache line/sector size. /** It should be used only in situations where having a compile-time upper bound is more useful than a run-time exact answer. @ingroup memory_allocation */ constexpr size_t max_nfs_size = 128; constexpr std::size_t max_nfs_size_exp = 7; static_assert(1 << max_nfs_size_exp == max_nfs_size, "max_nfs_size_exp must be a log2(max_nfs_size)"); //! Class that implements exponential backoff. class atomic_backoff { //! Time delay, in units of "pause" instructions. /** Should be equal to approximately the number of "pause" instructions that take the same time as an context switch. Must be a power of two.*/ static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16; std::int32_t count; public: // In many cases, an object of this type is initialized eagerly on hot path, // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ } // For this reason, the construction cost must be very small! atomic_backoff() : count(1) {} // This constructor pauses immediately; do not use on hot paths! atomic_backoff(bool) : count(1) { pause(); } //! No Copy atomic_backoff(const atomic_backoff&) = delete; atomic_backoff& operator=(const atomic_backoff&) = delete; //! Pause for a while. void pause() { if (count <= LOOPS_BEFORE_YIELD) { machine_pause(count); // Pause twice as long the next time. count *= 2; } else { // Pause is so long that we might as well yield CPU to scheduler. yield(); } } //! Pause for a few times and return false if saturated. bool bounded_pause() { machine_pause(count); if (count < LOOPS_BEFORE_YIELD) { // Pause twice as long the next time. count *= 2; return true; } else { return false; } } void reset() { count = 1; } }; //! Spin WHILE the condition is true. /** T and U should be comparable types. */ template T spin_wait_while(const std::atomic& location, C comp, std::memory_order order) { atomic_backoff backoff; T snapshot = location.load(order); while (comp(snapshot)) { backoff.pause(); snapshot = location.load(order); } return snapshot; } //! Spin WHILE the value of the variable is equal to a given value /** T and U should be comparable types. */ template T spin_wait_while_eq(const std::atomic& location, const U value, std::memory_order order = std::memory_order_acquire) { return spin_wait_while(location, [&value](T t) { return t == value; }, order); } //! Spin UNTIL the value of the variable is equal to a given value /** T and U should be comparable types. */ template T spin_wait_until_eq(const std::atomic& location, const U value, std::memory_order order = std::memory_order_acquire) { return spin_wait_while(location, [&value](T t) { return t != value; }, order); } //! Spin UNTIL the condition returns true or spinning time is up. /** Returns what the passed functor returned last time it was invoked. */ template bool timed_spin_wait_until(Condition condition) { // 32 pauses + 32 yields are meausered as balanced spin time before sleep. bool finish = condition(); for (int i = 1; !finish && i < 32; finish = condition(), i *= 2) { machine_pause(i); } for (int i = 32; !finish && i < 64; finish = condition(), ++i) { yield(); } return finish; } template std::uintptr_t log2(T in) { __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined."); return machine_log2(in); } template T reverse_bits(T src) { return machine_reverse_bits(src); } template T reverse_n_bits(T src, std::size_t n) { __TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior."); return reverse_bits(src) >> (number_of_bits() - n); } // A function to check if passed integer is a power of two template constexpr bool is_power_of_two( IntegerType arg ) { static_assert(std::is_integral::value, "An argument for is_power_of_two should be integral type"); return arg && (0 == (arg & (arg - 1))); } // A function to determine if passed integer is a power of two // at least as big as another power of two, i.e. for strictly positive i and j, // with j being a power of two, determines whether i==j< constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) { // Divisor should be a power of two static_assert(std::is_integral::value, "An argument for is_power_of_two_at_least should be integral type"); return 0 == (arg & (arg - divisor)); } // A function to compute arg modulo divisor where divisor is a power of 2. template inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) { __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" ); return arg & (divisor - 1); } //! A function to check if passed in pointer is aligned on a specific border template constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) { return 0 == ((std::uintptr_t)pointer & (alignment - 1)); } #if TBB_USE_ASSERT static void* const poisoned_ptr = reinterpret_cast(-1); //! Set p to invalid pointer value. template inline void poison_pointer( T* &p ) { p = reinterpret_cast(poisoned_ptr); } template inline void poison_pointer(std::atomic& p) { p.store(reinterpret_cast(poisoned_ptr), std::memory_order_relaxed); } /** Expected to be used in assertions only, thus no empty form is defined. **/ template inline bool is_poisoned( T* p ) { return p == reinterpret_cast(poisoned_ptr); } template inline bool is_poisoned(const std::atomic& p) { return is_poisoned(p.load(std::memory_order_relaxed)); } #else template inline void poison_pointer(T&) {/*do nothing*/} #endif /* !TBB_USE_ASSERT */ template bool assert_pointer_valid(T* p, const char* comment = nullptr) { suppress_unused_warning(p, comment); __TBB_ASSERT(p != nullptr, comment); __TBB_ASSERT(!is_poisoned(p), comment); #if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER) __TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment); #endif // Returns something to simplify assert_pointers_valid implementation. return true; } template void assert_pointers_valid(Args*... p) { // suppress_unused_warning is used as an evaluation context for the variadic pack. suppress_unused_warning(assert_pointer_valid(p)...); } //! Base class for types that should not be assigned. class no_assign { public: void operator=(const no_assign&) = delete; no_assign(const no_assign&) = default; no_assign() = default; }; //! Base class for types that should not be copied or assigned. class no_copy: no_assign { public: no_copy(const no_copy&) = delete; no_copy() = default; }; template void swap_atomics_relaxed(std::atomic& lhs, std::atomic& rhs){ T tmp = lhs.load(std::memory_order_relaxed); lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed); rhs.store(tmp, std::memory_order_relaxed); } //! One-time initialization states enum class do_once_state { uninitialized = 0, ///< No execution attempts have been undertaken yet pending, ///< A thread is executing associated do-once routine executed, ///< Do-once routine has been executed initialized = executed ///< Convenience alias }; //! One-time initialization function /** /param initializer Pointer to function without arguments The variant that returns bool is used for cases when initialization can fail and it is OK to continue execution, but the state should be reset so that the initialization attempt was repeated the next time. /param state Shared state associated with initializer that specifies its initialization state. Must be initially set to #uninitialized value (e.g. by means of default static zero initialization). **/ template void atomic_do_once( const F& initializer, std::atomic& state ) { // The loop in the implementation is necessary to avoid race when thread T2 // that arrived in the middle of initialization attempt by another thread T1 // has just made initialization possible. // In such a case T2 has to rely on T1 to initialize, but T1 may already be past // the point where it can recognize the changed conditions. do_once_state expected_state; while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) { if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) { expected_state = do_once_state::uninitialized; #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 using enum_type = typename std::underlying_type::type; if( ((std::atomic&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) { #else if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) { #endif run_initializer( initializer, state ); break; } } spin_wait_while_eq( state, do_once_state::pending ); } } // Run the initializer which can not fail template void run_initializer(const Functor& f, std::atomic& state ) { f(); state.store(do_once_state::executed, std::memory_order_release); } #if __TBB_CPP20_CONCEPTS_PRESENT template concept boolean_testable_impl = std::convertible_to; template concept boolean_testable = boolean_testable_impl && requires( T&& t ) { { !std::forward(t) } -> boolean_testable_impl; }; #if __TBB_CPP20_COMPARISONS_PRESENT struct synthesized_three_way_comparator { template auto operator()( const T1& lhs, const T2& rhs ) const requires requires { { lhs < rhs } -> boolean_testable; { rhs < lhs } -> boolean_testable; } { if constexpr (std::three_way_comparable_with) { return lhs <=> rhs; } else { if (lhs < rhs) { return std::weak_ordering::less; } if (rhs < lhs) { return std::weak_ordering::greater; } return std::weak_ordering::equivalent; } } }; // struct synthesized_three_way_comparator template using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval(), std::declval())); #endif // __TBB_CPP20_COMPARISONS_PRESENT // Check if the type T is implicitly OR explicitly convertible to U template concept relaxed_convertible_to = std::constructible_from; template concept adaptive_same_as = #if __TBB_STRICT_CONSTRAINTS std::same_as; #else std::convertible_to; #endif #endif // __TBB_CPP20_CONCEPTS_PRESENT } // namespace d0 namespace d1 { class delegate_base { public: virtual bool operator()() const = 0; virtual ~delegate_base() {} }; template class delegated_function : public delegate_base { public: delegated_function(FuncType& f) : my_func(f) {} bool operator()() const override { return my_func(); } private: FuncType &my_func; }; } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__utils_H _waitable_atomic.h000066400000000000000000000067141514453371700334270ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/detail/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_detail__address_waiters_H #define __TBB_detail__address_waiters_H #include "_utils.h" namespace tbb { namespace detail { namespace r1 { TBB_EXPORT void __TBB_EXPORTED_FUNC wait_on_address(void* address, d1::delegate_base& wakeup_condition, std::uintptr_t context); TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address(void* address, std::uintptr_t context); TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address_one(void* address); TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address_all(void* address); } // namespace r1 namespace d1 { template void adaptive_wait_on_address(void* address, Predicate wakeup_condition, std::uintptr_t context) { if (!timed_spin_wait_until(wakeup_condition)) { d1::delegated_function pred(wakeup_condition); r1::wait_on_address(address, pred, context); } } template class waitable_atomic { public: waitable_atomic() = default; explicit waitable_atomic(T value) : my_atomic(value) {} waitable_atomic(const waitable_atomic&) = delete; waitable_atomic& operator=(const waitable_atomic&) = delete; T load(std::memory_order order) const noexcept { return my_atomic.load(order); } T exchange(T desired) noexcept { return my_atomic.exchange(desired); } void wait(T old, std::uintptr_t context, std::memory_order order) { auto wakeup_condition = [&] { return my_atomic.load(order) != old; }; if (!timed_spin_wait_until(wakeup_condition)) { // We need to use while here, because notify_all() will wake up all threads // But predicate for them might be false d1::delegated_function pred(wakeup_condition); do { r1::wait_on_address(this, pred, context); } while (!wakeup_condition()); } } void wait_until(T expected, std::uintptr_t context, std::memory_order order) { auto wakeup_condition = [&] { return my_atomic.load(order) == expected; }; if (!timed_spin_wait_until(wakeup_condition)) { // We need to use while here, because notify_all() will wake up all threads // But predicate for them might be false d1::delegated_function pred(wakeup_condition); do { r1::wait_on_address(this, pred, context); } while (!wakeup_condition()); } } void notify_relaxed(std::uintptr_t context) { r1::notify_by_address(this, context); } void notify_one_relaxed() { r1::notify_by_address_one(this); } // TODO: consider adding following interfaces: // store(desired, memory_order) // notify_all_relaxed() private: std::atomic my_atomic{}; }; } // namespace d1 } // namespace detail } // namespace tbb #endif // __TBB_detail__address_waiters_H enumerable_thread_specific.h000066400000000000000000001226431514453371700342150ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_enumerable_thread_specific_H #define __TBB_enumerable_thread_specific_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_assert.h" #include "detail/_template_helpers.h" #include "detail/_aligned_space.h" #include "concurrent_vector.h" #include "tbb_allocator.h" #include "cache_aligned_allocator.h" #include "profiling.h" #include #include #include // memcpy #include // std::ptrdiff_t #include "task.h" // for task::suspend_point #if _WIN32 || _WIN64 #include #else #include #endif namespace tbb { namespace detail { namespace d1 { //! enum for selecting between single key and key-per-instance versions enum ets_key_usage_type { ets_key_per_instance , ets_no_key #if __TBB_RESUMABLE_TASKS , ets_suspend_aware #endif }; // Forward declaration to use in internal classes template class enumerable_thread_specific; template struct internal_ets_key_selector { using key_type = std::thread::id; static key_type current_key() { return std::this_thread::get_id(); } }; // Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types #if __INTEL_COMPILER && __APPLE__ template<> struct internal_ets_key_selector { using key_type = std::size_t; static key_type current_key() { auto id = std::this_thread::get_id(); return reinterpret_cast(id); } }; #endif template struct ets_key_selector : internal_ets_key_selector {}; #if __TBB_RESUMABLE_TASKS template <> struct ets_key_selector { using key_type = suspend_point; static key_type current_key() { return r1::current_suspend_point(); } }; #endif template class ets_base : detail::no_copy { protected: using key_type = typename ets_key_selector::key_type; public: struct slot; struct array { array* next; std::size_t lg_size; slot& at( std::size_t k ) { return (reinterpret_cast(reinterpret_cast(this+1)))[k]; } std::size_t size() const { return std::size_t(1) << lg_size; } std::size_t mask() const { return size() - 1; } std::size_t start( std::size_t h ) const { return h >> (8 * sizeof(std::size_t) - lg_size); } }; struct slot { std::atomic key; void* ptr; bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } bool claim( key_type k ) { // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size key_type expected = key_type(); return key.compare_exchange_strong(expected, k); } }; protected: //! Root of linked list of arrays of decreasing size. /** nullptr if and only if my_count==0. Each array in the list is half the size of its predecessor. */ std::atomic my_root; std::atomic my_count; virtual void* create_local() = 0; virtual void* create_array(std::size_t _size) = 0; // _size in bytes virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes array* allocate( std::size_t lg_size ) { std::size_t n = std::size_t(1) << lg_size; array* a = static_cast(create_array(sizeof(array) + n * sizeof(slot))); a->lg_size = lg_size; std::memset( a + 1, 0, n * sizeof(slot) ); return a; } void free(array* a) { std::size_t n = std::size_t(1) << (a->lg_size); free_array( static_cast(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); } ets_base() : my_root{nullptr}, my_count{0} {} virtual ~ets_base(); // g++ complains if this is not virtual void* table_lookup( bool& exists ); void table_clear(); // The following functions are not used in concurrent context, // so we don't need synchronization and ITT annotations there. template void table_elementwise_copy( const ets_base& other, void*(*add_element)(ets_base&, void*) ) { __TBB_ASSERT(!my_root.load(std::memory_order_relaxed),NULL); __TBB_ASSERT(!my_count.load(std::memory_order_relaxed),NULL); if( !other.my_root.load(std::memory_order_relaxed) ) return; array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); my_root.store(root, std::memory_order_relaxed); root->next = nullptr; my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); std::size_t mask = root->mask(); for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { for( std::size_t i = 0; i < r->size(); ++i ) { slot& s1 = r->at(i); if( !s1.empty() ) { for( std::size_t j = root->start(std::hash{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { slot& s2 = root->at(j); if( s2.empty() ) { s2.ptr = add_element(static_cast&>(*this), s1.ptr); s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); break; } else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) break; } } } } } void table_swap( ets_base& other ) { __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); swap_atomics_relaxed(my_root, other.my_root); swap_atomics_relaxed(my_count, other.my_count); } }; template ets_base::~ets_base() { __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); } template void ets_base::table_clear() { while ( array* r = my_root.load(std::memory_order_relaxed) ) { my_root.store(r->next, std::memory_order_relaxed); free(r); } my_count.store(0, std::memory_order_relaxed); } template void* ets_base::table_lookup( bool& exists ) { const key_type k = ets_key_selector::current_key(); __TBB_ASSERT(k != key_type(),NULL); void* found; std::size_t h = std::hash{}(k); for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { call_itt_notify(acquired,r); std::size_t mask=r->mask(); for(std::size_t i = r->start(h); ;i=(i+1)&mask) { slot& s = r->at(i); if( s.empty() ) break; if( s.match(k) ) { if( r == my_root.load(std::memory_order_acquire) ) { // Success at top level exists = true; return s.ptr; } else { // Success at some other level. Need to insert at top level. exists = true; found = s.ptr; goto insert; } } } } // Key does not yet exist. The density of slots in the table does not exceed 0.5, // for if this will occur a new table is allocated with double the current table // size, which is swapped in as the new root table. So an empty slot is guaranteed. exists = false; found = create_local(); { std::size_t c = ++my_count; array* r = my_root.load(std::memory_order_acquire); call_itt_notify(acquired,r); if( !r || c > r->size()/2 ) { std::size_t s = r ? r->lg_size : 2; while( c > std::size_t(1)<<(s-1) ) ++s; array* a = allocate(s); for(;;) { a->next = r; call_itt_notify(releasing,a); array* new_r = r; if( my_root.compare_exchange_strong(new_r, a) ) break; call_itt_notify(acquired, new_r); __TBB_ASSERT(new_r != nullptr, nullptr); if( new_r->lg_size >= s ) { // Another thread inserted an equal or bigger array, so our array is superfluous. free(a); break; } r = new_r; } } } insert: // Whether a slot has been found in an older table, or if it has been inserted at this level, // it has already been accounted for in the total. Guaranteed to be room for it, and it is // not present, so search for empty slot and use it. array* ir = my_root.load(std::memory_order_acquire); call_itt_notify(acquired, ir); std::size_t mask = ir->mask(); for(std::size_t i = ir->start(h);; i = (i+1)&mask) { slot& s = ir->at(i); if( s.empty() ) { if( s.claim(k) ) { s.ptr = found; return found; } } } } //! Specialization that exploits native TLS template <> class ets_base: public ets_base { using super = ets_base; #if _WIN32||_WIN64 #if __TBB_WIN8UI_SUPPORT using tls_key_t = DWORD; void create_key() { my_key = FlsAlloc(NULL); } void destroy_key() { FlsFree(my_key); } void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } void* get_tls() { return (void *)FlsGetValue(my_key); } #else using tls_key_t = DWORD; void create_key() { my_key = TlsAlloc(); } void destroy_key() { TlsFree(my_key); } void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } void* get_tls() { return (void *)TlsGetValue(my_key); } #endif #else using tls_key_t = pthread_key_t; void create_key() { pthread_key_create(&my_key, NULL); } void destroy_key() { pthread_key_delete(my_key); } void set_tls( void * value ) const { pthread_setspecific(my_key, value); } void* get_tls() const { return pthread_getspecific(my_key); } #endif tls_key_t my_key; virtual void* create_local() override = 0; virtual void* create_array(std::size_t _size) override = 0; // _size in bytes virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes protected: ets_base() {create_key();} ~ets_base() {destroy_key();} void* table_lookup( bool& exists ) { void* found = get_tls(); if( found ) { exists=true; } else { found = super::table_lookup(exists); set_tls(found); } return found; } void table_clear() { destroy_key(); create_key(); super::table_clear(); } void table_swap( ets_base& other ) { using std::swap; __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); swap(my_key, other.my_key); super::table_swap(other); } }; //! Random access iterator for traversing the thread local copies. template< typename Container, typename Value > class enumerable_thread_specific_iterator { //! current position in the concurrent_vector Container *my_container; typename Container::size_type my_index; mutable Value *my_value; template friend bool operator==( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ); template friend bool operator<( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ); template friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ); template friend class enumerable_thread_specific_iterator; public: //! STL support using difference_type = std::ptrdiff_t; using value_type = Value; using pointer = Value*; using reference = Value&; using iterator_category = std::random_access_iterator_tag; enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : my_container(&const_cast(container)), my_index(index), my_value(nullptr) {} //! Default constructor enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} template enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator& other ) : my_container( other.my_container ), my_index( other.my_index), my_value( const_cast(other.my_value) ) {} enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { return enumerable_thread_specific_iterator(*my_container, my_index + offset); } friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); } enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { my_index += offset; my_value = nullptr; return *this; } enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { return enumerable_thread_specific_iterator( *my_container, my_index-offset ); } enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { my_index -= offset; my_value = nullptr; return *this; } Value& operator*() const { Value* value = my_value; if( !value ) { value = my_value = (*my_container)[my_index].value(); } __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); return *value; } Value& operator[]( std::ptrdiff_t k ) const { return *(*my_container)[my_index + k].value(); } Value* operator->() const {return &operator*();} enumerable_thread_specific_iterator& operator++() { ++my_index; my_value = nullptr; return *this; } enumerable_thread_specific_iterator& operator--() { --my_index; my_value = nullptr; return *this; } //! Post increment enumerable_thread_specific_iterator operator++(int) { enumerable_thread_specific_iterator result = *this; ++my_index; my_value = nullptr; return result; } //! Post decrement enumerable_thread_specific_iterator operator--(int) { enumerable_thread_specific_iterator result = *this; --my_index; my_value = nullptr; return result; } }; template bool operator==( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return i.my_index == j.my_index && i.my_container == j.my_container; } template bool operator!=( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return !(i==j); } template bool operator<( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return i.my_index bool operator>( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return j bool operator>=( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return !(i bool operator<=( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return !(j std::ptrdiff_t operator-( const enumerable_thread_specific_iterator& i, const enumerable_thread_specific_iterator& j ) { return i.my_index-j.my_index; } template class segmented_iterator { template friend bool operator==(const segmented_iterator& i, const segmented_iterator& j); template friend bool operator!=(const segmented_iterator& i, const segmented_iterator& j); template friend class segmented_iterator; public: segmented_iterator() {my_segcont = nullptr;} segmented_iterator( const SegmentedContainer& _segmented_container ) : my_segcont(const_cast(&_segmented_container)), outer_iter(my_segcont->end()) { } ~segmented_iterator() {} using InnerContainer = typename SegmentedContainer::value_type; using inner_iterator = typename InnerContainer::iterator; using outer_iterator = typename SegmentedContainer::iterator; // STL support // TODO: inherit all types from segmented container? using difference_type = std::ptrdiff_t; using value_type = Value; using size_type = typename SegmentedContainer::size_type; using pointer = Value*; using reference = Value&; using iterator_category = std::input_iterator_tag; // Copy Constructor template segmented_iterator(const segmented_iterator& other) : my_segcont(other.my_segcont), outer_iter(other.outer_iter), // can we assign a default-constructed iterator to inner if we're at the end? inner_iter(other.inner_iter) {} // assignment template segmented_iterator& operator=( const segmented_iterator& other) { my_segcont = other.my_segcont; outer_iter = other.outer_iter; if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; return *this; } // allow assignment of outer iterator to segmented iterator. Once it is // assigned, move forward until a non-empty inner container is found or // the end of the outer container is reached. segmented_iterator& operator=(const outer_iterator& new_outer_iter) { __TBB_ASSERT(my_segcont != nullptr, NULL); // check that this iterator points to something inside the segmented container for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { if( !outer_iter->empty() ) { inner_iter = outer_iter->begin(); break; } } return *this; } // pre-increment segmented_iterator& operator++() { advance_me(); return *this; } // post-increment segmented_iterator operator++(int) { segmented_iterator tmp = *this; operator++(); return tmp; } bool operator==(const outer_iterator& other_outer) const { __TBB_ASSERT(my_segcont != nullptr, NULL); return (outer_iter == other_outer && (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); } bool operator!=(const outer_iterator& other_outer) const { return !operator==(other_outer); } // (i)* RHS reference operator*() const { __TBB_ASSERT(my_segcont != nullptr, NULL); __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen return *inner_iter; } // i-> pointer operator->() const { return &operator*();} private: SegmentedContainer* my_segcont; outer_iterator outer_iter; inner_iterator inner_iter; void advance_me() { __TBB_ASSERT(my_segcont != nullptr, NULL); __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty. ++inner_iter; while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { inner_iter = outer_iter->begin(); } } }; // segmented_iterator template bool operator==( const segmented_iterator& i, const segmented_iterator& j ) { if(i.my_segcont != j.my_segcont) return false; if(i.my_segcont == nullptr) return true; if(i.outer_iter != j.outer_iter) return false; if(i.outer_iter == i.my_segcont->end()) return true; return i.inner_iter == j.inner_iter; } // != template bool operator!=( const segmented_iterator& i, const segmented_iterator& j ) { return !(i==j); } template struct construct_by_default: no_assign { void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. construct_by_default( int ) {} }; template struct construct_by_exemplar: no_assign { const T exemplar; void construct(void*where) {new(where) T(exemplar);} construct_by_exemplar( const T& t ) : exemplar(t) {} construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} }; template struct construct_by_finit: no_assign { Finit f; void construct(void* where) {new(where) T(f());} construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} }; template struct construct_by_args: no_assign { stored_pack pack; void construct(void* where) { call( [where](const typename std::decay

::type&... args ){ new(where) T(args...); }, pack ); } construct_by_args( P&& ... args ) : pack(std::forward

(args)...) {} }; // storage for initialization function pointer // TODO: consider removing the template parameter T here and in callback_leaf class callback_base { public: // Clone *this virtual callback_base* clone() const = 0; // Destruct and free *this virtual void destroy() = 0; // Need virtual destructor to satisfy GCC compiler warning virtual ~callback_base() { } // Construct T at where virtual void construct(void* where) = 0; }; template class callback_leaf: public callback_base, Constructor { template callback_leaf( P&& ... params ) : Constructor(std::forward

(params)...) {} // TODO: make the construction/destruction consistent (use allocator.construct/destroy) using my_allocator_type = typename tbb::tbb_allocator; callback_base* clone() const override { return make(*this); } void destroy() override { my_allocator_type alloc; tbb::detail::allocator_traits::destroy(alloc, this); tbb::detail::allocator_traits::deallocate(alloc, this, 1); } void construct(void* where) override { Constructor::construct(where); } public: template static callback_base* make( P&& ... params ) { void* where = my_allocator_type().allocate(1); return new(where) callback_leaf( std::forward

(params)... ); } }; //! Template for recording construction of objects in table /** All maintenance of the space will be done explicitly on push_back, and all thread local copies must be destroyed before the concurrent vector is deleted. The flag is_built is initialized to false. When the local is successfully-constructed, set the flag to true or call value_committed(). If the constructor throws, the flag will be false. */ template struct ets_element { detail::aligned_space my_space; bool is_built; ets_element() { is_built = false; } // not currently-built U* value() { return my_space.begin(); } U* value_committed() { is_built = true; return my_space.begin(); } ~ets_element() { if(is_built) { my_space.begin()->~U(); is_built = false; } } }; // A predicate that can be used for a compile-time compatibility check of ETS instances // Ideally, it should have been declared inside the ETS class, but unfortunately // in that case VS2013 does not enable the variadic constructor. template struct is_compatible_ets : std::false_type {}; template struct is_compatible_ets< T, enumerable_thread_specific > : std::is_same {}; // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression template using has_empty_braces_operator = decltype(std::declval()()); template using is_callable_no_args = supports; //! The enumerable_thread_specific container /** enumerable_thread_specific has the following properties: - thread-local copies are lazily created, with default, exemplar or function initialization. - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. - the contained objects need not have operator=() defined if combine is not used. - enumerable_thread_specific containers may be copy-constructed or assigned. - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods @par Segmented iterator When the thread-local objects are containers with input_iterators defined, a segmented iterator may be used to iterate over all the elements of all thread-local copies. @par combine and combine_each - Both methods are defined for enumerable_thread_specific. - combine() requires the type T have operator=() defined. - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) - Both are evaluated in serial context (the methods are assumed to be non-benign.) @ingroup containers */ template , ets_key_usage_type ETS_key_type=ets_no_key > class enumerable_thread_specific: ets_base { template friend class enumerable_thread_specific; using padded_element = padded>; //! A generic range, used to create range objects from the iterators template class generic_range_type: public blocked_range { public: using value_type = T; using reference = T&; using const_reference = const T&; using iterator = I; using difference_type = std::ptrdiff_t; generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range(begin_,end_,grainsize_) {} template generic_range_type( const generic_range_type& r) : blocked_range(r.begin(),r.end(),r.grainsize()) {} generic_range_type( generic_range_type& r, split ) : blocked_range(r,split()) {} }; using allocator_traits_type = tbb::detail::allocator_traits; using padded_allocator_type = typename allocator_traits_type::template rebind_alloc; using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; callback_base *my_construct_callback; internal_collection_type my_locals; // TODO: consider unifying the callback mechanism for all create_local* methods below // (likely non-compatible and requires interface version increase) void* create_local() override { padded_element& lref = *my_locals.grow_by(1); my_construct_callback->construct(lref.value()); return lref.value_committed(); } static void* create_local_by_copy( ets_base& base, void* p ) { enumerable_thread_specific& ets = static_cast(base); padded_element& lref = *ets.my_locals.grow_by(1); new(lref.value()) T(*static_cast(p)); return lref.value_committed(); } static void* create_local_by_move( ets_base& base, void* p ) { enumerable_thread_specific& ets = static_cast(base); padded_element& lref = *ets.my_locals.grow_by(1); new(lref.value()) T(std::move(*static_cast(p))); return lref.value_committed(); } using array_allocator_type = typename allocator_traits_type::template rebind_alloc; // _size is in bytes void* create_array(std::size_t _size) override { std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); return array_allocator_type().allocate(nelements); } void free_array( void* _ptr, std::size_t _size) override { std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); array_allocator_type().deallocate( reinterpret_cast(_ptr),nelements); } public: //! Basic types using value_type = T; using allocator_type = Allocator; using size_type = typename internal_collection_type::size_type; using difference_type = typename internal_collection_type::difference_type; using reference = value_type&; using const_reference = const value_type&; using pointer = typename allocator_traits_type::pointer; using const_pointer = typename allocator_traits_type::const_pointer; // Iterator types using iterator = enumerable_thread_specific_iterator; using const_iterator = enumerable_thread_specific_iterator; // Parallel range types using range_type = generic_range_type; using const_range_type = generic_range_type; //! Default constructor. Each local instance of T is default constructed. enumerable_thread_specific() : my_construct_callback( callback_leaf >::make(/*dummy argument*/0) ){} //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). template ::type>::value>::type> explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( callback_leaf >::make( std::move(finit) ) ){} //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( callback_leaf >::make( exemplar ) ){} explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( callback_leaf >::make( std::move(exemplar) ) ){} //! Variadic constructor with initializer arguments. Each local instance of T is constructed by T(args...) template ::type>::value && !is_compatible_ets::type>::value && !std::is_same::type>::value >::type> enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( callback_leaf >::make( std::forward(arg1), std::forward

(args)... ) ){} //! Destructor ~enumerable_thread_specific() { if(my_construct_callback) my_construct_callback->destroy(); // Deallocate the hash table before overridden free_array() becomes inaccessible this->ets_base::table_clear(); } //! returns reference to local, discarding exists reference local() { bool exists; return local(exists); } //! Returns reference to calling thread's local copy, creating one if necessary reference local(bool& exists) { void* ptr = this->table_lookup(exists); return *(T*)ptr; } //! Get the number of local copies size_type size() const { return my_locals.size(); } //! true if there have been no local copies created bool empty() const { return my_locals.empty(); } //! begin iterator iterator begin() { return iterator( my_locals, 0 ); } //! end iterator iterator end() { return iterator(my_locals, my_locals.size() ); } //! begin const iterator const_iterator begin() const { return const_iterator(my_locals, 0); } //! end const iterator const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } //! Get range for parallel algorithms range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } //! Get const range for parallel algorithms const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } //! Destroys local copies void clear() { my_locals.clear(); this->table_clear(); // callback is not destroyed } private: template void internal_copy(const enumerable_thread_specific& other) { // this tests is_compatible_ets static_assert( (is_compatible_ets::type>::value), "is_compatible_ets fails" ); // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. my_construct_callback = other.my_construct_callback->clone(); __TBB_ASSERT(my_locals.size()==0,NULL); my_locals.reserve(other.size()); this->table_elementwise_copy( other, create_local_by_copy ); } void internal_swap(enumerable_thread_specific& other) { using std::swap; __TBB_ASSERT( this!=&other, NULL ); swap(my_construct_callback, other.my_construct_callback); // concurrent_vector::swap() preserves storage space, // so addresses to the vector kept in ETS hash table remain valid. swap(my_locals, other.my_locals); this->ets_base::table_swap(other); } template void internal_move(enumerable_thread_specific&& other) { static_assert( (is_compatible_ets::type>::value), "is_compatible_ets fails" ); my_construct_callback = other.my_construct_callback; other.my_construct_callback = nullptr; __TBB_ASSERT(my_locals.size()==0,NULL); my_locals.reserve(other.size()); this->table_elementwise_copy( other, create_local_by_move ); } public: enumerable_thread_specific( const enumerable_thread_specific& other ) : ets_base() /* prevents GCC warnings with -Wextra */ { internal_copy(other); } template enumerable_thread_specific( const enumerable_thread_specific& other ) { internal_copy(other); } enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() { // TODO: use internal_move correctly here internal_swap(other); } template enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() { internal_move(std::move(other)); } enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) { if( this != &other ) { this->clear(); my_construct_callback->destroy(); internal_copy( other ); } return *this; } template enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) { __TBB_ASSERT( static_cast(this)!=static_cast(&other), NULL ); // Objects of different types this->clear(); my_construct_callback->destroy(); internal_copy(other); return *this; } enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) { if( this != &other ) { // TODO: use internal_move correctly here internal_swap(other); } return *this; } template enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) { __TBB_ASSERT( static_cast(this)!=static_cast(&other), NULL ); // Objects of different types this->clear(); my_construct_callback->destroy(); internal_move(std::move(other)); return *this; } // CombineFunc has signature T(T,T) or T(const T&, const T&) template T combine(CombineFunc f_combine) { if(begin() == end()) { ets_element location; my_construct_callback->construct(location.value()); return *location.value_committed(); } const_iterator ci = begin(); T my_result = *ci; while(++ci != end()) my_result = f_combine( my_result, *ci ); return my_result; } // combine_func_t takes T by value or by [const] reference, and returns nothing template void combine_each(CombineFunc f_combine) { for(iterator ci = begin(); ci != end(); ++ci) { f_combine( *ci ); } } }; // enumerable_thread_specific template< typename Container > class flattened2d { // This intermediate typedef is to address issues with VC7.1 compilers using conval_type = typename Container::value_type; public: //! Basic types using size_type = typename conval_type::size_type; using difference_type = typename conval_type::difference_type; using allocator_type = typename conval_type::allocator_type; using value_type = typename conval_type::value_type; using reference = typename conval_type::reference; using const_reference = typename conval_type::const_reference; using pointer = typename conval_type::pointer; using const_pointer = typename conval_type::const_pointer; using iterator = segmented_iterator; using const_iterator = segmented_iterator; flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : my_container(const_cast(&c)), my_begin(b), my_end(e) { } explicit flattened2d( const Container &c ) : my_container(const_cast(&c)), my_begin(c.begin()), my_end(c.end()) { } iterator begin() { return iterator(*my_container) = my_begin; } iterator end() { return iterator(*my_container) = my_end; } const_iterator begin() const { return const_iterator(*my_container) = my_begin; } const_iterator end() const { return const_iterator(*my_container) = my_end; } size_type size() const { size_type tot_size = 0; for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { tot_size += i->size(); } return tot_size; } private: Container *my_container; typename Container::const_iterator my_begin; typename Container::const_iterator my_end; }; template flattened2d flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { return flattened2d(c, b, e); } template flattened2d flatten2d(const Container &c) { return flattened2d(c); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::enumerable_thread_specific; using detail::d1::flattened2d; using detail::d1::flatten2d; // ets enum keys using detail::d1::ets_key_usage_type; using detail::d1::ets_key_per_instance; using detail::d1::ets_no_key; #if __TBB_RESUMABLE_TASKS using detail::d1::ets_suspend_aware; #endif } // inline namespace v1 } // namespace tbb #endif // __TBB_enumerable_thread_specific_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/flow_graph.h000066400000000000000000003751561514453371700311220ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_flow_graph_H #define __TBB_flow_graph_H #include #include #include #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "spin_mutex.h" #include "null_mutex.h" #include "spin_rw_mutex.h" #include "null_rw_mutex.h" #include "detail/_pipeline_filters.h" #include "detail/_task.h" #include "detail/_small_object_pool.h" #include "cache_aligned_allocator.h" #include "detail/_exception.h" #include "detail/_template_helpers.h" #include "detail/_aggregator.h" #include "detail/_allocator_traits.h" #include "detail/_utils.h" #include "profiling.h" #include "task_arena.h" #if TBB_USE_PROFILING_TOOLS && ( __unix__ || __APPLE__ ) #if __INTEL_COMPILER // Disabled warning "routine is both inline and noinline" #pragma warning (push) #pragma warning( disable: 2196 ) #endif #define __TBB_NOINLINE_SYM __attribute__((noinline)) #else #define __TBB_NOINLINE_SYM #endif #include #include #include #if __TBB_CPP20_CONCEPTS_PRESENT #include #endif /** @file \brief The graph related classes and functions There are some applications that best express dependencies as messages passed between nodes in a graph. These messages may contain data or simply act as signals that a predecessors has completed. The graph class and its associated node classes can be used to express such applications. */ namespace tbb { namespace detail { namespace d1 { //! An enumeration the provides the two most common concurrency levels: unlimited and serial enum concurrency { unlimited = 0, serial = 1 }; //! A generic null type struct null_type {}; //! An empty class used for messages that mean "I'm done" class continue_msg {}; } // namespace d1 #if __TBB_CPP20_CONCEPTS_PRESENT namespace d0 { template concept node_body_return_type = std::same_as || std::same_as; template concept continue_node_body = std::copy_constructible && requires( Body& body, const tbb::detail::d1::continue_msg& v ) { { body(v) } -> node_body_return_type; }; template concept function_node_body = std::copy_constructible && requires( Body& body, const Input& v ) { { body(v) } -> node_body_return_type; }; template concept join_node_function_object = std::copy_constructible && requires( FunctionObject& func, const Input& v ) { { func(v) } -> adaptive_same_as; }; template concept input_node_body = std::copy_constructible && requires( Body& body, tbb::detail::d1::flow_control& fc ) { { body(fc) } -> adaptive_same_as; }; template concept multifunction_node_body = std::copy_constructible && requires( Body& body, const Input& v, OutputPortsType& p ) { body(v, p); }; template concept sequencer = std::copy_constructible && requires( Sequencer& seq, const Value& value ) { { seq(value) } -> adaptive_same_as; }; template concept async_node_body = std::copy_constructible && requires( Body& body, const Input& v, GatewayType& gateway ) { body(v, gateway); }; } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT namespace d1 { //! Forward declaration section template< typename T > class sender; template< typename T > class receiver; class continue_receiver; template< typename T, typename U > class limiter_node; // needed for resetting decrementer template class successor_cache; template class broadcast_cache; template class round_robin_cache; template class predecessor_cache; template class reservable_predecessor_cache; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET namespace order { struct following; struct preceding; } template struct node_set; #endif } // namespace d1 } // namespace detail } // namespace tbb //! The graph class #include "detail/_flow_graph_impl.h" namespace tbb { namespace detail { namespace d1 { static inline std::pair order_tasks(graph_task* first, graph_task* second) { if (second->priority > first->priority) return std::make_pair(second, first); return std::make_pair(first, second); } // submit task if necessary. Returns the non-enqueued task if there is one. static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* right) { // if no RHS task, don't change left. if (right == NULL) return left; // right != NULL if (left == NULL) return right; if (left == SUCCESSFULLY_ENQUEUED) return right; // left contains a task if (right != SUCCESSFULLY_ENQUEUED) { // both are valid tasks auto tasks_pair = order_tasks(left, right); spawn_in_graph_arena(g, *tasks_pair.first); return tasks_pair.second; } return left; } //! Pure virtual template class that defines a sender of messages of type T template< typename T > class sender { public: virtual ~sender() {} //! Request an item from the sender virtual bool try_get( T & ) { return false; } //! Reserves an item in the sender virtual bool try_reserve( T & ) { return false; } //! Releases the reserved item virtual bool try_release( ) { return false; } //! Consumes the reserved item virtual bool try_consume( ) { return false; } protected: //! The output type of this sender typedef T output_type; //! The successor type for this node typedef receiver successor_type; //! Add a new successor to this node virtual bool register_successor( successor_type &r ) = 0; //! Removes a successor from this node virtual bool remove_successor( successor_type &r ) = 0; template friend bool register_successor(sender& s, receiver& r); template friend bool remove_successor (sender& s, receiver& r); }; // class sender template bool register_successor(sender& s, receiver& r) { return s.register_successor(r); } template bool remove_successor(sender& s, receiver& r) { return s.remove_successor(r); } //! Pure virtual template class that defines a receiver of messages of type T template< typename T > class receiver { public: //! Destructor virtual ~receiver() {} //! Put an item to the receiver bool try_put( const T& t ) { graph_task *res = try_put_task(t); if (!res) return false; if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); return true; } //! put item to successor; return task to run the successor if possible. protected: //! The input type of this receiver typedef T input_type; //! The predecessor type for this node typedef sender predecessor_type; template< typename R, typename B > friend class run_and_put_task; template< typename X, typename Y > friend class broadcast_cache; template< typename X, typename Y > friend class round_robin_cache; virtual graph_task *try_put_task(const T& t) = 0; virtual graph& graph_reference() const = 0; template friend class successor_cache; virtual bool is_continue_receiver() { return false; } // TODO revamp: reconsider the inheritance and move node priority out of receiver virtual node_priority_t priority() const { return no_priority; } //! Add a predecessor to the node virtual bool register_predecessor( predecessor_type & ) { return false; } //! Remove a predecessor from the node virtual bool remove_predecessor( predecessor_type & ) { return false; } template friend bool register_predecessor(receiver& r, sender& s); template friend bool remove_predecessor (receiver& r, sender& s); }; // class receiver template bool register_predecessor(receiver& r, sender& s) { return r.register_predecessor(s); } template bool remove_predecessor(receiver& r, sender& s) { return r.remove_predecessor(s); } //! Base class for receivers of completion messages /** These receivers automatically reset, but cannot be explicitly waited on */ class continue_receiver : public receiver< continue_msg > { protected: //! Constructor explicit continue_receiver( int number_of_predecessors, node_priority_t a_priority ) { my_predecessor_count = my_initial_predecessor_count = number_of_predecessors; my_current_count = 0; my_priority = a_priority; } //! Copy constructor continue_receiver( const continue_receiver& src ) : receiver() { my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count; my_current_count = 0; my_priority = src.my_priority; } //! Increments the trigger threshold bool register_predecessor( predecessor_type & ) override { spin_mutex::scoped_lock l(my_mutex); ++my_predecessor_count; return true; } //! Decrements the trigger threshold /** Does not check to see if the removal of the predecessor now makes the current count exceed the new threshold. So removing a predecessor while the graph is active can cause unexpected results. */ bool remove_predecessor( predecessor_type & ) override { spin_mutex::scoped_lock l(my_mutex); --my_predecessor_count; return true; } //! The input type typedef continue_msg input_type; //! The predecessor type for this node typedef receiver::predecessor_type predecessor_type; template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; // execute body is supposed to be too small to create a task for. graph_task* try_put_task( const input_type & ) override { { spin_mutex::scoped_lock l(my_mutex); if ( ++my_current_count < my_predecessor_count ) return SUCCESSFULLY_ENQUEUED; else my_current_count = 0; } graph_task* res = execute(); return res? res : SUCCESSFULLY_ENQUEUED; } spin_mutex my_mutex; int my_predecessor_count; int my_current_count; int my_initial_predecessor_count; node_priority_t my_priority; // the friend declaration in the base class did not eliminate the "protected class" // error in gcc 4.1.2 template friend class limiter_node; virtual void reset_receiver( reset_flags f ) { my_current_count = 0; if (f & rf_clear_edges) { my_predecessor_count = my_initial_predecessor_count; } } //! Does whatever should happen when the threshold is reached /** This should be very fast or else spawn a task. This is called while the sender is blocked in the try_put(). */ virtual graph_task* execute() = 0; template friend class successor_cache; bool is_continue_receiver() override { return true; } node_priority_t priority() const override { return my_priority; } }; // class continue_receiver #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING template K key_from_message( const T &t ) { return t.key(); } #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ } // d1 } // detail } // tbb #include "detail/_flow_graph_trace_impl.h" #include "detail/_hash_compare.h" namespace tbb { namespace detail { namespace d1 { #include "detail/_flow_graph_body_impl.h" #include "detail/_flow_graph_cache_impl.h" #include "detail/_flow_graph_types_impl.h" using namespace graph_policy_namespace; template graph_iterator::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL) { if (begin) current_node = my_graph->my_nodes; //else it is an end iterator by default } template typename graph_iterator::reference graph_iterator::operator*() const { __TBB_ASSERT(current_node, "graph_iterator at end"); return *operator->(); } template typename graph_iterator::pointer graph_iterator::operator->() const { return current_node; } template void graph_iterator::internal_forward() { if (current_node) current_node = current_node->next; } //! Constructs a graph with isolated task_group_context inline graph::graph() : my_wait_context(0), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { prepare_task_arena(); own_context = true; cancelled = false; caught_exception = false; my_context = new (r1::cache_aligned_allocate(sizeof(task_group_context))) task_group_context(FLOW_TASKS); fgt_graph(this); my_is_active = true; } inline graph::graph(task_group_context& use_this_context) : my_wait_context(0), my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { prepare_task_arena(); own_context = false; cancelled = false; caught_exception = false; fgt_graph(this); my_is_active = true; } inline graph::~graph() { wait_for_all(); if (own_context) { my_context->~task_group_context(); r1::cache_aligned_deallocate(my_context); } delete my_task_arena; } inline void graph::reserve_wait() { my_wait_context.reserve(); fgt_reserve_wait(this); } inline void graph::release_wait() { fgt_release_wait(this); my_wait_context.release(); } inline void graph::register_node(graph_node *n) { n->next = NULL; { spin_mutex::scoped_lock lock(nodelist_mutex); n->prev = my_nodes_last; if (my_nodes_last) my_nodes_last->next = n; my_nodes_last = n; if (!my_nodes) my_nodes = n; } } inline void graph::remove_node(graph_node *n) { { spin_mutex::scoped_lock lock(nodelist_mutex); __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes"); if (n->prev) n->prev->next = n->next; if (n->next) n->next->prev = n->prev; if (my_nodes_last == n) my_nodes_last = n->prev; if (my_nodes == n) my_nodes = n->next; } n->prev = n->next = NULL; } inline void graph::reset( reset_flags f ) { // reset context deactivate_graph(*this); my_context->reset(); cancelled = false; caught_exception = false; // reset all the nodes comprising the graph for(iterator ii = begin(); ii != end(); ++ii) { graph_node *my_p = &(*ii); my_p->reset_node(f); } // Reattach the arena. Might be useful to run the graph in a particular task_arena // while not limiting graph lifetime to a single task_arena::execute() call. prepare_task_arena( /*reinit=*/true ); activate_graph(*this); } inline void graph::cancel() { my_context->cancel_group_execution(); } inline graph::iterator graph::begin() { return iterator(this, true); } inline graph::iterator graph::end() { return iterator(this, false); } inline graph::const_iterator graph::begin() const { return const_iterator(this, true); } inline graph::const_iterator graph::end() const { return const_iterator(this, false); } inline graph::const_iterator graph::cbegin() const { return const_iterator(this, true); } inline graph::const_iterator graph::cend() const { return const_iterator(this, false); } inline graph_node::graph_node(graph& g) : my_graph(g) { my_graph.register_node(this); } inline graph_node::~graph_node() { my_graph.remove_node(this); } #include "detail/_flow_graph_node_impl.h" //! An executable node that acts as a source, i.e. it has no predecessors template < typename Output > __TBB_requires(std::copyable) class input_node : public graph_node, public sender< Output > { public: //! The type of the output message, which is complete typedef Output output_type; //! The type of successors of this node typedef typename sender::successor_type successor_type; // Input node has no input type typedef null_type input_type; //! Constructor for a node with a successor template< typename Body > __TBB_requires(input_node_body) __TBB_NOINLINE_SYM input_node( graph &g, Body body ) : graph_node(g), my_active(false) , my_body( new input_body_leaf< output_type, Body>(body) ) , my_init_body( new input_body_leaf< output_type, Body>(body) ) , my_successors(this), my_reserved(false), my_has_cached_item(false) { fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, static_cast *>(this), this->my_body); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(input_node_body) input_node( const node_set& successors, Body body ) : input_node(successors.graph_reference(), body) { make_edges(*this, successors); } #endif //! Copy constructor __TBB_NOINLINE_SYM input_node( const input_node& src ) : graph_node(src.my_graph), sender() , my_active(false) , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone()) , my_successors(this), my_reserved(false), my_has_cached_item(false) { fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, static_cast *>(this), this->my_body); } //! The destructor ~input_node() { delete my_body; delete my_init_body; } //! Add a new successor to this node bool register_successor( successor_type &r ) override { spin_mutex::scoped_lock lock(my_mutex); my_successors.register_successor(r); if ( my_active ) spawn_put(); return true; } //! Removes a successor from this node bool remove_successor( successor_type &r ) override { spin_mutex::scoped_lock lock(my_mutex); my_successors.remove_successor(r); return true; } //! Request an item from the node bool try_get( output_type &v ) override { spin_mutex::scoped_lock lock(my_mutex); if ( my_reserved ) return false; if ( my_has_cached_item ) { v = my_cached_item; my_has_cached_item = false; return true; } // we've been asked to provide an item, but we have none. enqueue a task to // provide one. if ( my_active ) spawn_put(); return false; } //! Reserves an item. bool try_reserve( output_type &v ) override { spin_mutex::scoped_lock lock(my_mutex); if ( my_reserved ) { return false; } if ( my_has_cached_item ) { v = my_cached_item; my_reserved = true; return true; } else { return false; } } //! Release a reserved item. /** true = item has been released and so remains in sender, dest must request or reserve future items */ bool try_release( ) override { spin_mutex::scoped_lock lock(my_mutex); __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" ); my_reserved = false; if(!my_successors.empty()) spawn_put(); return true; } //! Consumes a reserved item bool try_consume( ) override { spin_mutex::scoped_lock lock(my_mutex); __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" ); my_reserved = false; my_has_cached_item = false; if ( !my_successors.empty() ) { spawn_put(); } return true; } //! Activates a node that was created in the inactive state void activate() { spin_mutex::scoped_lock lock(my_mutex); my_active = true; if (!my_successors.empty()) spawn_put(); } template Body copy_function_object() { input_body &body_ref = *this->my_body; return dynamic_cast< input_body_leaf & >(body_ref).get_body(); } protected: //! resets the input_node to its initial state void reset_node( reset_flags f) override { my_active = false; my_reserved = false; my_has_cached_item = false; if(f & rf_clear_edges) my_successors.clear(); if(f & rf_reset_bodies) { input_body *tmp = my_init_body->clone(); delete my_body; my_body = tmp; } } private: spin_mutex my_mutex; bool my_active; input_body *my_body; input_body *my_init_body; broadcast_cache< output_type > my_successors; bool my_reserved; bool my_has_cached_item; output_type my_cached_item; // used by apply_body_bypass, can invoke body of node. bool try_reserve_apply_body(output_type &v) { spin_mutex::scoped_lock lock(my_mutex); if ( my_reserved ) { return false; } if ( !my_has_cached_item ) { flow_control control; fgt_begin_body( my_body ); my_cached_item = (*my_body)(control); my_has_cached_item = !control.is_pipeline_stopped; fgt_end_body( my_body ); } if ( my_has_cached_item ) { v = my_cached_item; my_reserved = true; return true; } else { return false; } } graph_task* create_put_task() { small_object_allocator allocator{}; typedef input_node_task_bypass< input_node > task_type; graph_task* t = allocator.new_object(my_graph, allocator, *this); my_graph.reserve_wait(); return t; } //! Spawns a task that applies the body void spawn_put( ) { if(is_graph_active(this->my_graph)) { spawn_in_graph_arena(this->my_graph, *create_put_task()); } } friend class input_node_task_bypass< input_node >; //! Applies the body. Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it. graph_task* apply_body_bypass( ) { output_type v; if ( !try_reserve_apply_body(v) ) return NULL; graph_task *last_task = my_successors.try_put_task(v); if ( last_task ) try_consume(); else try_release(); return last_task; } }; // class input_node //! Implements a function node that supports Input -> Output template __TBB_requires(std::default_initializable && std::copy_constructible && std::copy_constructible) class function_node : public graph_node , public function_input< Input, Output, Policy, cache_aligned_allocator > , public function_output { typedef cache_aligned_allocator internals_allocator; public: typedef Input input_type; typedef Output output_type; typedef function_input input_impl_type; typedef function_input_queue input_queue_type; typedef function_output fOutput_type; typedef typename input_impl_type::predecessor_type predecessor_type; typedef typename fOutput_type::successor_type successor_type; using input_impl_type::my_predecessors; //! Constructor // input_queue_type is allocated here, but destroyed in the function_input_base. // TODO: pass the graph_buffer_policy to the function_input_base so it can all // be done in one place. This would be an interface-breaking change. template< typename Body > __TBB_requires(function_node_body) __TBB_NOINLINE_SYM function_node( graph &g, size_t concurrency, Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority), fOutput_type(g) { fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this), this->my_body ); } template __TBB_requires(function_node_body) function_node( graph& g, size_t concurrency, Body body, node_priority_t a_priority ) : function_node(g, concurrency, body, Policy(), a_priority) {} #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(function_node_body) function_node( const node_set& nodes, size_t concurrency, Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) : function_node(nodes.graph_reference(), concurrency, body, p, a_priority) { make_edges_in_order(nodes, *this); } template __TBB_requires(function_node_body) function_node( const node_set& nodes, size_t concurrency, Body body, node_priority_t a_priority ) : function_node(nodes, concurrency, body, Policy(), a_priority) {} #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET //! Copy constructor __TBB_NOINLINE_SYM function_node( const function_node& src ) : graph_node(src.my_graph), input_impl_type(src), fOutput_type(src.my_graph) { fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this), this->my_body ); } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; using input_impl_type::try_put_task; broadcast_cache &successors () override { return fOutput_type::my_successors; } void reset_node(reset_flags f) override { input_impl_type::reset_function_input(f); // TODO: use clear() instead. if(f & rf_clear_edges) { successors().clear(); my_predecessors.clear(); } __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty"); __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty"); } }; // class function_node //! implements a function node that supports Input -> (set of outputs) // Output is a tuple of output types. template __TBB_requires(std::default_initializable && std::copy_constructible) class multifunction_node : public graph_node, public multifunction_input < Input, typename wrap_tuple_elements< std::tuple_size::value, // #elements in tuple multifunction_output, // wrap this around each element Output // the tuple providing the types >::type, Policy, cache_aligned_allocator > { typedef cache_aligned_allocator internals_allocator; protected: static const int N = std::tuple_size::value; public: typedef Input input_type; typedef null_type output_type; typedef typename wrap_tuple_elements::type output_ports_type; typedef multifunction_input< input_type, output_ports_type, Policy, internals_allocator> input_impl_type; typedef function_input_queue input_queue_type; private: using input_impl_type::my_predecessors; public: template __TBB_requires(multifunction_node_body) __TBB_NOINLINE_SYM multifunction_node( graph &g, size_t concurrency, Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority) { fgt_multioutput_node_with_body( CODEPTR(), FLOW_MULTIFUNCTION_NODE, &this->my_graph, static_cast *>(this), this->output_ports(), this->my_body ); } template __TBB_requires(multifunction_node_body) __TBB_NOINLINE_SYM multifunction_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) : multifunction_node(g, concurrency, body, Policy(), a_priority) {} #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(multifunction_node_body) __TBB_NOINLINE_SYM multifunction_node(const node_set& nodes, size_t concurrency, Body body, Policy p = Policy(), node_priority_t a_priority = no_priority) : multifunction_node(nodes.graph_reference(), concurrency, body, p, a_priority) { make_edges_in_order(nodes, *this); } template __TBB_requires(multifunction_node_body) __TBB_NOINLINE_SYM multifunction_node(const node_set& nodes, size_t concurrency, Body body, node_priority_t a_priority) : multifunction_node(nodes, concurrency, body, Policy(), a_priority) {} #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET __TBB_NOINLINE_SYM multifunction_node( const multifunction_node &other) : graph_node(other.my_graph), input_impl_type(other) { fgt_multioutput_node_with_body( CODEPTR(), FLOW_MULTIFUNCTION_NODE, &this->my_graph, static_cast *>(this), this->output_ports(), this->my_body ); } // all the guts are in multifunction_input... protected: void reset_node(reset_flags f) override { input_impl_type::reset(f); } }; // multifunction_node //! split_node: accepts a tuple as input, forwards each element of the tuple to its // successors. The node has unlimited concurrency, so it does not reject inputs. template class split_node : public graph_node, public receiver { static const int N = std::tuple_size::value; typedef receiver base_type; public: typedef TupleType input_type; typedef typename wrap_tuple_elements< N, // #elements in tuple multifunction_output, // wrap this around each element TupleType // the tuple providing the types >::type output_ports_type; __TBB_NOINLINE_SYM explicit split_node(graph &g) : graph_node(g), my_output_ports(init_output_ports::call(g, my_output_ports)) { fgt_multioutput_node(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, static_cast *>(this), this->output_ports()); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_NOINLINE_SYM split_node(const node_set& nodes) : split_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif __TBB_NOINLINE_SYM split_node(const split_node& other) : graph_node(other.my_graph), base_type(other), my_output_ports(init_output_ports::call(other.my_graph, my_output_ports)) { fgt_multioutput_node(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, static_cast *>(this), this->output_ports()); } output_ports_type &output_ports() { return my_output_ports; } protected: graph_task *try_put_task(const TupleType& t) override { // Sending split messages in parallel is not justified, as overheads would prevail. // Also, we do not have successors here. So we just tell the task returned here is successful. return emit_element::emit_this(this->my_graph, t, output_ports()); } void reset_node(reset_flags f) override { if (f & rf_clear_edges) clear_element::clear_this(my_output_ports); __TBB_ASSERT(!(f & rf_clear_edges) || clear_element::this_empty(my_output_ports), "split_node reset failed"); } graph& graph_reference() const override { return my_graph; } private: output_ports_type my_output_ports; }; //! Implements an executable node that supports continue_msg -> Output template > __TBB_requires(std::copy_constructible) class continue_node : public graph_node, public continue_input, public function_output { public: typedef continue_msg input_type; typedef Output output_type; typedef continue_input input_impl_type; typedef function_output fOutput_type; typedef typename input_impl_type::predecessor_type predecessor_type; typedef typename fOutput_type::successor_type successor_type; //! Constructor for executable node with continue_msg -> Output template __TBB_requires(continue_node_body) __TBB_NOINLINE_SYM continue_node( graph &g, Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) : graph_node(g), input_impl_type( g, body, a_priority ), fOutput_type(g) { fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this), this->my_body ); } template __TBB_requires(continue_node_body) continue_node( graph& g, Body body, node_priority_t a_priority ) : continue_node(g, body, Policy(), a_priority) {} #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(continue_node_body) continue_node( const node_set& nodes, Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) : continue_node(nodes.graph_reference(), body, p, a_priority ) { make_edges_in_order(nodes, *this); } template __TBB_requires(continue_node_body) continue_node( const node_set& nodes, Body body, node_priority_t a_priority) : continue_node(nodes, body, Policy(), a_priority) {} #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET //! Constructor for executable node with continue_msg -> Output template __TBB_requires(continue_node_body) __TBB_NOINLINE_SYM continue_node( graph &g, int number_of_predecessors, Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) : graph_node(g) , input_impl_type(g, number_of_predecessors, body, a_priority), fOutput_type(g) { fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this), this->my_body ); } template __TBB_requires(continue_node_body) continue_node( graph& g, int number_of_predecessors, Body body, node_priority_t a_priority) : continue_node(g, number_of_predecessors, body, Policy(), a_priority) {} #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(continue_node_body) continue_node( const node_set& nodes, int number_of_predecessors, Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) : continue_node(nodes.graph_reference(), number_of_predecessors, body, p, a_priority) { make_edges_in_order(nodes, *this); } template __TBB_requires(continue_node_body) continue_node( const node_set& nodes, int number_of_predecessors, Body body, node_priority_t a_priority ) : continue_node(nodes, number_of_predecessors, body, Policy(), a_priority) {} #endif //! Copy constructor __TBB_NOINLINE_SYM continue_node( const continue_node& src ) : graph_node(src.my_graph), input_impl_type(src), function_output(src.my_graph) { fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this), this->my_body ); } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; using input_impl_type::try_put_task; broadcast_cache &successors () override { return fOutput_type::my_successors; } void reset_node(reset_flags f) override { input_impl_type::reset_receiver(f); if(f & rf_clear_edges)successors().clear(); __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset"); } }; // continue_node //! Forwards messages of type T to all successors template class broadcast_node : public graph_node, public receiver, public sender { public: typedef T input_type; typedef T output_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; private: broadcast_cache my_successors; public: __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) { fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template broadcast_node(const node_set& nodes) : broadcast_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {} //! Adds a successor bool register_successor( successor_type &r ) override { my_successors.register_successor( r ); return true; } //! Removes s as a successor bool remove_successor( successor_type &r ) override { my_successors.remove_successor( r ); return true; } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; //! build a task to run the successor if possible. Default is old behavior. graph_task *try_put_task(const T& t) override { graph_task *new_task = my_successors.try_put_task(t); if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; return new_task; } graph& graph_reference() const override { return my_graph; } void reset_node(reset_flags f) override { if (f&rf_clear_edges) { my_successors.clear(); } __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node"); } }; // broadcast_node //! Forwards messages in arbitrary order template class buffer_node : public graph_node , public reservable_item_buffer< T, cache_aligned_allocator > , public receiver, public sender { typedef cache_aligned_allocator internals_allocator; public: typedef T input_type; typedef T output_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; typedef buffer_node class_type; protected: typedef size_t size_type; round_robin_cache< T, null_rw_mutex > my_successors; friend class forward_task_bypass< class_type >; enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task }; // implements the aggregator_operation concept class buffer_operation : public aggregated_operation< buffer_operation > { public: char type; T* elem; graph_task* ltask; successor_type *r; buffer_operation(const T& e, op_type t) : type(char(t)) , elem(const_cast(&e)) , ltask(NULL) {} buffer_operation(op_type t) : type(char(t)), ltask(NULL) {} }; bool forwarder_busy; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator< handler_type, buffer_operation> my_aggregator; virtual void handle_operations(buffer_operation *op_list) { handle_operations_impl(op_list, this); } template void handle_operations_impl(buffer_operation *op_list, derived_type* derived) { __TBB_ASSERT(static_cast(derived) == this, "'this' is not a base class for derived"); buffer_operation *tmp = NULL; bool try_forwarding = false; while (op_list) { tmp = op_list; op_list = op_list->next; switch (tmp->type) { case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break; case rem_succ: internal_rem_succ(tmp); break; case req_item: internal_pop(tmp); break; case res_item: internal_reserve(tmp); break; case rel_res: internal_release(tmp); try_forwarding = true; break; case con_res: internal_consume(tmp); try_forwarding = true; break; case put_item: try_forwarding = internal_push(tmp); break; case try_fwd_task: internal_forward_task(tmp); break; } } derived->order(); if (try_forwarding && !forwarder_busy) { if(is_graph_active(this->my_graph)) { forwarder_busy = true; typedef forward_task_bypass task_type; small_object_allocator allocator{}; graph_task* new_task = allocator.new_object(graph_reference(), allocator, *this); my_graph.reserve_wait(); // tmp should point to the last item handled by the aggregator. This is the operation // the handling thread enqueued. So modifying that record will be okay. // TODO revamp: check that the issue is still present // workaround for icc bug (at least 12.0 and 13.0) // error: function "tbb::flow::interfaceX::combine_tasks" cannot be called with the given argument list // argument types are: (graph, graph_task *, graph_task *) graph_task *z = tmp->ltask; graph &g = this->my_graph; tmp->ltask = combine_tasks(g, z, new_task); // in case the op generated a task } } } // handle_operations inline graph_task *grab_forwarding_task( buffer_operation &op_data) { return op_data.ltask; } inline bool enqueue_forwarding_task(buffer_operation &op_data) { graph_task *ft = grab_forwarding_task(op_data); if(ft) { spawn_in_graph_arena(graph_reference(), *ft); return true; } return false; } //! This is executed by an enqueued task, the "forwarder" virtual graph_task *forward_task() { buffer_operation op_data(try_fwd_task); graph_task *last_task = NULL; do { op_data.status = WAIT; op_data.ltask = NULL; my_aggregator.execute(&op_data); // workaround for icc bug graph_task *xtask = op_data.ltask; graph& g = this->my_graph; last_task = combine_tasks(g, last_task, xtask); } while (op_data.status ==SUCCEEDED); return last_task; } //! Register successor virtual void internal_reg_succ(buffer_operation *op) { my_successors.register_successor(*(op->r)); op->status.store(SUCCEEDED, std::memory_order_release); } //! Remove successor virtual void internal_rem_succ(buffer_operation *op) { my_successors.remove_successor(*(op->r)); op->status.store(SUCCEEDED, std::memory_order_release); } private: void order() {} bool is_item_valid() { return this->my_item_valid(this->my_tail - 1); } void try_put_and_add_task(graph_task*& last_task) { graph_task *new_task = my_successors.try_put_task(this->back()); if (new_task) { // workaround for icc bug graph& g = this->my_graph; last_task = combine_tasks(g, last_task, new_task); this->destroy_back(); } } protected: //! Tries to forward valid items to successors virtual void internal_forward_task(buffer_operation *op) { internal_forward_task_impl(op, this); } template void internal_forward_task_impl(buffer_operation *op, derived_type* derived) { __TBB_ASSERT(static_cast(derived) == this, "'this' is not a base class for derived"); if (this->my_reserved || !derived->is_item_valid()) { op->status.store(FAILED, std::memory_order_release); this->forwarder_busy = false; return; } // Try forwarding, giving each successor a chance graph_task* last_task = NULL; size_type counter = my_successors.size(); for (; counter > 0 && derived->is_item_valid(); --counter) derived->try_put_and_add_task(last_task); op->ltask = last_task; // return task if (last_task && !counter) { op->status.store(SUCCEEDED, std::memory_order_release); } else { op->status.store(FAILED, std::memory_order_release); forwarder_busy = false; } } virtual bool internal_push(buffer_operation *op) { this->push_back(*(op->elem)); op->status.store(SUCCEEDED, std::memory_order_release); return true; } virtual void internal_pop(buffer_operation *op) { if(this->pop_back(*(op->elem))) { op->status.store(SUCCEEDED, std::memory_order_release); } else { op->status.store(FAILED, std::memory_order_release); } } virtual void internal_reserve(buffer_operation *op) { if(this->reserve_front(*(op->elem))) { op->status.store(SUCCEEDED, std::memory_order_release); } else { op->status.store(FAILED, std::memory_order_release); } } virtual void internal_consume(buffer_operation *op) { this->consume_front(); op->status.store(SUCCEEDED, std::memory_order_release); } virtual void internal_release(buffer_operation *op) { this->release_front(); op->status.store(SUCCEEDED, std::memory_order_release); } public: //! Constructor __TBB_NOINLINE_SYM explicit buffer_node( graph &g ) : graph_node(g), reservable_item_buffer(), receiver(), sender(), my_successors(this), forwarder_busy(false) { my_aggregator.initialize_handler(handler_type(this)); fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template buffer_node(const node_set& nodes) : buffer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {} // // message sender implementation // //! Adds a new successor. /** Adds successor r to the list of successors; may forward tasks. */ bool register_successor( successor_type &r ) override { buffer_operation op_data(reg_succ); op_data.r = &r; my_aggregator.execute(&op_data); (void)enqueue_forwarding_task(op_data); return true; } //! Removes a successor. /** Removes successor r from the list of successors. It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */ bool remove_successor( successor_type &r ) override { // TODO revamp: investigate why full qualification is necessary here tbb::detail::d1::remove_predecessor(r, *this); buffer_operation op_data(rem_succ); op_data.r = &r; my_aggregator.execute(&op_data); // even though this operation does not cause a forward, if we are the handler, and // a forward is scheduled, we may be the first to reach this point after the aggregator, // and so should check for the task. (void)enqueue_forwarding_task(op_data); return true; } //! Request an item from the buffer_node /** true = v contains the returned item
false = no item has been returned */ bool try_get( T &v ) override { buffer_operation op_data(req_item); op_data.elem = &v; my_aggregator.execute(&op_data); (void)enqueue_forwarding_task(op_data); return (op_data.status==SUCCEEDED); } //! Reserves an item. /** false = no item can be reserved
true = an item is reserved */ bool try_reserve( T &v ) override { buffer_operation op_data(res_item); op_data.elem = &v; my_aggregator.execute(&op_data); (void)enqueue_forwarding_task(op_data); return (op_data.status==SUCCEEDED); } //! Release a reserved item. /** true = item has been released and so remains in sender */ bool try_release() override { buffer_operation op_data(rel_res); my_aggregator.execute(&op_data); (void)enqueue_forwarding_task(op_data); return true; } //! Consumes a reserved item. /** true = item is removed from sender and reservation removed */ bool try_consume() override { buffer_operation op_data(con_res); my_aggregator.execute(&op_data); (void)enqueue_forwarding_task(op_data); return true; } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; //! receive an item, return a task *if possible graph_task *try_put_task(const T &t) override { buffer_operation op_data(t, put_item); my_aggregator.execute(&op_data); graph_task *ft = grab_forwarding_task(op_data); // sequencer_nodes can return failure (if an item has been previously inserted) // We have to spawn the returned task if our own operation fails. if(ft && op_data.status ==FAILED) { // we haven't succeeded queueing the item, but for some reason the // call returned a task (if another request resulted in a successful // forward this could happen.) Queue the task and reset the pointer. spawn_in_graph_arena(graph_reference(), *ft); ft = NULL; } else if(!ft && op_data.status ==SUCCEEDED) { ft = SUCCESSFULLY_ENQUEUED; } return ft; } graph& graph_reference() const override { return my_graph; } protected: void reset_node( reset_flags f) override { reservable_item_buffer::reset(); // TODO: just clear structures if (f&rf_clear_edges) { my_successors.clear(); } forwarder_busy = false; } }; // buffer_node //! Forwards messages in FIFO order template class queue_node : public buffer_node { protected: typedef buffer_node base_type; typedef typename base_type::size_type size_type; typedef typename base_type::buffer_operation queue_operation; typedef queue_node class_type; private: template friend class buffer_node; bool is_item_valid() { return this->my_item_valid(this->my_head); } void try_put_and_add_task(graph_task*& last_task) { graph_task *new_task = this->my_successors.try_put_task(this->front()); if (new_task) { // workaround for icc bug graph& graph_ref = this->graph_reference(); last_task = combine_tasks(graph_ref, last_task, new_task); this->destroy_front(); } } protected: void internal_forward_task(queue_operation *op) override { this->internal_forward_task_impl(op, this); } void internal_pop(queue_operation *op) override { if ( this->my_reserved || !this->my_item_valid(this->my_head)){ op->status.store(FAILED, std::memory_order_release); } else { this->pop_front(*(op->elem)); op->status.store(SUCCEEDED, std::memory_order_release); } } void internal_reserve(queue_operation *op) override { if (this->my_reserved || !this->my_item_valid(this->my_head)) { op->status.store(FAILED, std::memory_order_release); } else { this->reserve_front(*(op->elem)); op->status.store(SUCCEEDED, std::memory_order_release); } } void internal_consume(queue_operation *op) override { this->consume_front(); op->status.store(SUCCEEDED, std::memory_order_release); } public: typedef T input_type; typedef T output_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; //! Constructor __TBB_NOINLINE_SYM explicit queue_node( graph &g ) : base_type(g) { fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template queue_node( const node_set& nodes) : queue_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor __TBB_NOINLINE_SYM queue_node( const queue_node& src) : base_type(src) { fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } protected: void reset_node( reset_flags f) override { base_type::reset_node(f); } }; // queue_node //! Forwards messages in sequence order template __TBB_requires(std::copyable) class sequencer_node : public queue_node { function_body< T, size_t > *my_sequencer; // my_sequencer should be a benign function and must be callable // from a parallel context. Does this mean it needn't be reset? public: typedef T input_type; typedef T output_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; //! Constructor template< typename Sequencer > __TBB_requires(sequencer) __TBB_NOINLINE_SYM sequencer_node( graph &g, const Sequencer& s ) : queue_node(g), my_sequencer(new function_body_leaf< T, size_t, Sequencer>(s) ) { fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(sequencer) sequencer_node( const node_set& nodes, const Sequencer& s) : sequencer_node(nodes.graph_reference(), s) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor __TBB_NOINLINE_SYM sequencer_node( const sequencer_node& src ) : queue_node(src), my_sequencer( src.my_sequencer->clone() ) { fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } //! Destructor ~sequencer_node() { delete my_sequencer; } protected: typedef typename buffer_node::size_type size_type; typedef typename buffer_node::buffer_operation sequencer_operation; private: bool internal_push(sequencer_operation *op) override { size_type tag = (*my_sequencer)(*(op->elem)); #if !TBB_DEPRECATED_SEQUENCER_DUPLICATES if (tag < this->my_head) { // have already emitted a message with this tag op->status.store(FAILED, std::memory_order_release); return false; } #endif // cannot modify this->my_tail now; the buffer would be inconsistent. size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail; if (this->size(new_tail) > this->capacity()) { this->grow_my_array(this->size(new_tail)); } this->my_tail = new_tail; const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED; op->status.store(res, std::memory_order_release); return res ==SUCCEEDED; } }; // sequencer_node //! Forwards messages in priority order template> class priority_queue_node : public buffer_node { public: typedef T input_type; typedef T output_type; typedef buffer_node base_type; typedef priority_queue_node class_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; //! Constructor __TBB_NOINLINE_SYM explicit priority_queue_node( graph &g, const Compare& comp = Compare() ) : buffer_node(g), compare(comp), mark(0) { fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template priority_queue_node(const node_set& nodes, const Compare& comp = Compare()) : priority_queue_node(nodes.graph_reference(), comp) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor __TBB_NOINLINE_SYM priority_queue_node( const priority_queue_node &src ) : buffer_node(src), mark(0) { fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } protected: void reset_node( reset_flags f) override { mark = 0; base_type::reset_node(f); } typedef typename buffer_node::size_type size_type; typedef typename buffer_node::item_type item_type; typedef typename buffer_node::buffer_operation prio_operation; //! Tries to forward valid items to successors void internal_forward_task(prio_operation *op) override { this->internal_forward_task_impl(op, this); } void handle_operations(prio_operation *op_list) override { this->handle_operations_impl(op_list, this); } bool internal_push(prio_operation *op) override { prio_push(*(op->elem)); op->status.store(SUCCEEDED, std::memory_order_release); return true; } void internal_pop(prio_operation *op) override { // if empty or already reserved, don't pop if ( this->my_reserved == true || this->my_tail == 0 ) { op->status.store(FAILED, std::memory_order_release); return; } *(op->elem) = prio(); op->status.store(SUCCEEDED, std::memory_order_release); prio_pop(); } // pops the highest-priority item, saves copy void internal_reserve(prio_operation *op) override { if (this->my_reserved == true || this->my_tail == 0) { op->status.store(FAILED, std::memory_order_release); return; } this->my_reserved = true; *(op->elem) = prio(); reserved_item = *(op->elem); op->status.store(SUCCEEDED, std::memory_order_release); prio_pop(); } void internal_consume(prio_operation *op) override { op->status.store(SUCCEEDED, std::memory_order_release); this->my_reserved = false; reserved_item = input_type(); } void internal_release(prio_operation *op) override { op->status.store(SUCCEEDED, std::memory_order_release); prio_push(reserved_item); this->my_reserved = false; reserved_item = input_type(); } private: template friend class buffer_node; void order() { if (mark < this->my_tail) heapify(); __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify"); } bool is_item_valid() { return this->my_tail > 0; } void try_put_and_add_task(graph_task*& last_task) { graph_task * new_task = this->my_successors.try_put_task(this->prio()); if (new_task) { // workaround for icc bug graph& graph_ref = this->graph_reference(); last_task = combine_tasks(graph_ref, last_task, new_task); prio_pop(); } } private: Compare compare; size_type mark; input_type reserved_item; // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item bool prio_use_tail() { __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test"); return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1)); } // prio_push: checks that the item will fit, expand array if necessary, put at end void prio_push(const T &src) { if ( this->my_tail >= this->my_array_size ) this->grow_my_array( this->my_tail + 1 ); (void) this->place_item(this->my_tail, src); ++(this->my_tail); __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push"); } // prio_pop: deletes highest priority item from the array, and if it is item // 0, move last item to 0 and reheap. If end of array, just destroy and decrement tail // and mark. Assumes the array has already been tested for emptiness; no failure. void prio_pop() { if (prio_use_tail()) { // there are newly pushed elements; last one higher than top // copy the data this->destroy_item(this->my_tail-1); --(this->my_tail); __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); return; } this->destroy_item(0); if(this->my_tail > 1) { // push the last element down heap __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL); this->move_item(0,this->my_tail - 1); } --(this->my_tail); if(mark > this->my_tail) --mark; if (this->my_tail > 1) // don't reheap for heap of size 1 reheap(); __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); } const T& prio() { return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0); } // turn array into heap void heapify() { if(this->my_tail == 0) { mark = 0; return; } if (!mark) mark = 1; for (; markmy_tail; ++mark) { // for each unheaped element size_type cur_pos = mark; input_type to_place; this->fetch_item(mark,to_place); do { // push to_place up the heap size_type parent = (cur_pos-1)>>1; if (!compare(this->get_my_item(parent), to_place)) break; this->move_item(cur_pos, parent); cur_pos = parent; } while( cur_pos ); (void) this->place_item(cur_pos, to_place); } } // otherwise heapified array with new root element; rearrange to heap void reheap() { size_type cur_pos=0, child=1; while (child < mark) { size_type target = child; if (child+1get_my_item(child), this->get_my_item(child+1))) ++target; // target now has the higher priority child if (compare(this->get_my_item(target), this->get_my_item(cur_pos))) break; // swap this->swap_items(cur_pos, target); cur_pos = target; child = (cur_pos<<1)+1; } } }; // priority_queue_node //! Forwards messages only if the threshold has not been reached /** This node forwards items until its threshold is reached. It contains no buffering. If the downstream node rejects, the message is dropped. */ template< typename T, typename DecrementType=continue_msg > class limiter_node : public graph_node, public receiver< T >, public sender< T > { public: typedef T input_type; typedef T output_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later. private: size_t my_threshold; size_t my_count; // number of successful puts size_t my_tries; // number of active put attempts size_t my_future_decrement; // number of active decrement reservable_predecessor_cache< T, spin_mutex > my_predecessors; spin_mutex my_mutex; broadcast_cache< T > my_successors; //! The internal receiver< DecrementType > that adjusts the count threshold_regulator< limiter_node, DecrementType > decrement; graph_task* decrement_counter( long long delta ) { if ( delta > 0 && size_t(delta) > my_threshold ) { delta = my_threshold; } { spin_mutex::scoped_lock lock(my_mutex); if ( delta > 0 && size_t(delta) > my_count ) { if( my_tries > 0 ) { my_future_decrement += (size_t(delta) - my_count); } my_count = 0; } else if ( delta < 0 && size_t(-delta) > my_threshold - my_count ) { my_count = my_threshold; } else { my_count -= size_t(delta); // absolute value of delta is sufficiently small } __TBB_ASSERT(my_count <= my_threshold, "counter values are truncated to be inside the [0, threshold] interval"); } return forward_task(); } // Let threshold_regulator call decrement_counter() friend class threshold_regulator< limiter_node, DecrementType >; friend class forward_task_bypass< limiter_node >; bool check_conditions() { // always called under lock return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() ); } // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED graph_task* forward_task() { input_type v; graph_task* rval = NULL; bool reserved = false; { spin_mutex::scoped_lock lock(my_mutex); if ( check_conditions() ) ++my_tries; else return NULL; } //SUCCESS // if we can reserve and can put, we consume the reservation // we increment the count and decrement the tries if ( (my_predecessors.try_reserve(v)) == true ) { reserved = true; if ( (rval = my_successors.try_put_task(v)) != NULL ) { { spin_mutex::scoped_lock lock(my_mutex); ++my_count; if ( my_future_decrement ) { if ( my_count > my_future_decrement ) { my_count -= my_future_decrement; my_future_decrement = 0; } else { my_future_decrement -= my_count; my_count = 0; } } --my_tries; my_predecessors.try_consume(); if ( check_conditions() ) { if ( is_graph_active(this->my_graph) ) { typedef forward_task_bypass> task_type; small_object_allocator allocator{}; graph_task* rtask = allocator.new_object( my_graph, allocator, *this ); my_graph.reserve_wait(); spawn_in_graph_arena(graph_reference(), *rtask); } } } return rval; } } //FAILURE //if we can't reserve, we decrement the tries //if we can reserve but can't put, we decrement the tries and release the reservation { spin_mutex::scoped_lock lock(my_mutex); --my_tries; if (reserved) my_predecessors.try_release(); if ( check_conditions() ) { if ( is_graph_active(this->my_graph) ) { small_object_allocator allocator{}; typedef forward_task_bypass> task_type; graph_task* t = allocator.new_object(my_graph, allocator, *this); my_graph.reserve_wait(); __TBB_ASSERT(!rval, "Have two tasks to handle"); return t; } } return rval; } } void initialize() { fgt_node( CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph, static_cast *>(this), static_cast *>(&decrement), static_cast *>(this) ); } public: //! Constructor limiter_node(graph &g, size_t threshold) : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_future_decrement(0), my_predecessors(this), my_successors(this), decrement(this) { initialize(); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template limiter_node(const node_set& nodes, size_t threshold) : limiter_node(nodes.graph_reference(), threshold) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {} //! The interface for accessing internal receiver< DecrementType > that adjusts the count receiver& decrementer() { return decrement; } //! Replace the current successor with this new successor bool register_successor( successor_type &r ) override { spin_mutex::scoped_lock lock(my_mutex); bool was_empty = my_successors.empty(); my_successors.register_successor(r); //spawn a forward task if this is the only successor if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) { if ( is_graph_active(this->my_graph) ) { small_object_allocator allocator{}; typedef forward_task_bypass> task_type; graph_task* t = allocator.new_object(my_graph, allocator, *this); my_graph.reserve_wait(); spawn_in_graph_arena(graph_reference(), *t); } } return true; } //! Removes a successor from this node /** r.remove_predecessor(*this) is also called. */ bool remove_successor( successor_type &r ) override { // TODO revamp: investigate why qualification is needed for remove_predecessor() call tbb::detail::d1::remove_predecessor(r, *this); my_successors.remove_successor(r); return true; } //! Adds src to the list of cached predecessors. bool register_predecessor( predecessor_type &src ) override { spin_mutex::scoped_lock lock(my_mutex); my_predecessors.add( src ); if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) { small_object_allocator allocator{}; typedef forward_task_bypass> task_type; graph_task* t = allocator.new_object(my_graph, allocator, *this); my_graph.reserve_wait(); spawn_in_graph_arena(graph_reference(), *t); } return true; } //! Removes src from the list of cached predecessors. bool remove_predecessor( predecessor_type &src ) override { my_predecessors.remove( src ); return true; } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; //! Puts an item to this receiver graph_task* try_put_task( const T &t ) override { { spin_mutex::scoped_lock lock(my_mutex); if ( my_count + my_tries >= my_threshold ) return NULL; else ++my_tries; } graph_task* rtask = my_successors.try_put_task(t); if ( !rtask ) { // try_put_task failed. spin_mutex::scoped_lock lock(my_mutex); --my_tries; if (check_conditions() && is_graph_active(this->my_graph)) { small_object_allocator allocator{}; typedef forward_task_bypass> task_type; rtask = allocator.new_object(my_graph, allocator, *this); my_graph.reserve_wait(); } } else { spin_mutex::scoped_lock lock(my_mutex); ++my_count; if ( my_future_decrement ) { if ( my_count > my_future_decrement ) { my_count -= my_future_decrement; my_future_decrement = 0; } else { my_future_decrement -= my_count; my_count = 0; } } --my_tries; } return rtask; } graph& graph_reference() const override { return my_graph; } void reset_node( reset_flags f ) override { my_count = 0; if ( f & rf_clear_edges ) { my_predecessors.clear(); my_successors.clear(); } else { my_predecessors.reset(); } decrement.reset_receiver(f); } }; // limiter_node #include "detail/_flow_graph_join_impl.h" template class join_node; template class join_node: public unfolded_join_node::value, reserving_port, OutputTuple, reserving> { private: static const int N = std::tuple_size::value; typedef unfolded_join_node unfolded_type; public: typedef OutputTuple output_type; typedef typename unfolded_type::input_ports_type input_ports_type; __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_NOINLINE_SYM join_node(const node_set& nodes, reserving = reserving()) : join_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; template class join_node: public unfolded_join_node::value, queueing_port, OutputTuple, queueing> { private: static const int N = std::tuple_size::value; typedef unfolded_join_node unfolded_type; public: typedef OutputTuple output_type; typedef typename unfolded_type::input_ports_type input_ports_type; __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_NOINLINE_SYM join_node(const node_set& nodes, queueing = queueing()) : join_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #if __TBB_CPP20_CONCEPTS_PRESENT // Helper function which is well-formed only if all of the elements in OutputTuple // satisfies join_node_function_object template void join_node_function_objects_helper( std::index_sequence ) requires (std::tuple_size_v == sizeof...(Functions)) && (... && join_node_function_object, K>); template concept join_node_functions = requires { join_node_function_objects_helper(std::make_index_sequence{}); }; #endif // template for key_matching join_node // tag_matching join_node is a specialization of key_matching, and is source-compatible. template class join_node > : public unfolded_join_node::value, key_matching_port, OutputTuple, key_matching > { private: static const int N = std::tuple_size::value; typedef unfolded_join_node > unfolded_type; public: typedef OutputTuple output_type; typedef typename unfolded_type::input_ports_type input_ports_type; #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING join_node(graph &g) : unfolded_type(g) {} #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) : unfolded_type(g, b0, b1, b2, b3, b4) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_VARIADIC_MAX >= 6 template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) : unfolded_type(g, b0, b1, b2, b3, b4, b5) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #endif #if __TBB_VARIADIC_MAX >= 7 template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #endif #if __TBB_VARIADIC_MAX >= 8 template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #endif #if __TBB_VARIADIC_MAX >= 9 template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #endif #if __TBB_VARIADIC_MAX >= 10 template __TBB_requires(join_node_functions) __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #endif #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template < #if (__clang_major__ == 3 && __clang_minor__ == 4) // clang 3.4 misdeduces 'Args...' for 'node_set' while it can cope with template template parameter. template class node_set, #endif typename... Args, typename... Bodies > __TBB_requires((sizeof...(Bodies) == 0) || join_node_functions) __TBB_NOINLINE_SYM join_node(const node_set& nodes, Bodies... bodies) : join_node(nodes.graph_reference(), bodies...) { make_edges_in_order(nodes, *this); } #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; // indexer node #include "detail/_flow_graph_indexer_impl.h" // TODO: Implement interface with variadic template or tuple template class indexer_node; //indexer node specializations template class indexer_node : public unfolded_indexer_node > { private: static const int N = 1; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; template class indexer_node : public unfolded_indexer_node > { private: static const int N = 2; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; template class indexer_node : public unfolded_indexer_node > { private: static const int N = 3; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; template class indexer_node : public unfolded_indexer_node > { private: static const int N = 4; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; template class indexer_node : public unfolded_indexer_node > { private: static const int N = 5; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #if __TBB_VARIADIC_MAX >= 6 template class indexer_node : public unfolded_indexer_node > { private: static const int N = 6; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #endif //variadic max 6 #if __TBB_VARIADIC_MAX >= 7 template class indexer_node : public unfolded_indexer_node > { private: static const int N = 7; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #endif //variadic max 7 #if __TBB_VARIADIC_MAX >= 8 template class indexer_node : public unfolded_indexer_node > { private: static const int N = 8; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #endif //variadic max 8 #if __TBB_VARIADIC_MAX >= 9 template class indexer_node : public unfolded_indexer_node > { private: static const int N = 9; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #endif //variadic max 9 #if __TBB_VARIADIC_MAX >= 10 template class indexer_node/*default*/ : public unfolded_indexer_node > { private: static const int N = 10; public: typedef std::tuple InputTuple; typedef tagged_msg output_type; typedef unfolded_indexer_node unfolded_type; __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template indexer_node(const node_set& nodes) : indexer_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif // Copy constructor __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { fgt_multiinput_node( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, this->input_ports(), static_cast< sender< output_type > *>(this) ); } }; #endif //variadic max 10 template< typename T > inline void internal_make_edge( sender &p, receiver &s ) { register_successor(p, s); fgt_make_edge( &p, &s ); } //! Makes an edge between a single predecessor and a single successor template< typename T > inline void make_edge( sender &p, receiver &s ) { internal_make_edge( p, s ); } //Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor. template< typename T, typename V, typename = typename T::output_ports_type, typename = typename V::input_ports_type > inline void make_edge( T& output, V& input) { make_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); } //Makes an edge from port 0 of a multi-output predecessor to a receiver. template< typename T, typename R, typename = typename T::output_ports_type > inline void make_edge( T& output, receiver& input) { make_edge(std::get<0>(output.output_ports()), input); } //Makes an edge from a sender to port 0 of a multi-input successor. template< typename S, typename V, typename = typename V::input_ports_type > inline void make_edge( sender& output, V& input) { make_edge(output, std::get<0>(input.input_ports())); } template< typename T > inline void internal_remove_edge( sender &p, receiver &s ) { remove_successor( p, s ); fgt_remove_edge( &p, &s ); } //! Removes an edge between a single predecessor and a single successor template< typename T > inline void remove_edge( sender &p, receiver &s ) { internal_remove_edge( p, s ); } //Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor. template< typename T, typename V, typename = typename T::output_ports_type, typename = typename V::input_ports_type > inline void remove_edge( T& output, V& input) { remove_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); } //Removes an edge between port 0 of a multi-output predecessor and a receiver. template< typename T, typename R, typename = typename T::output_ports_type > inline void remove_edge( T& output, receiver& input) { remove_edge(std::get<0>(output.output_ports()), input); } //Removes an edge between a sender and port 0 of a multi-input successor. template< typename S, typename V, typename = typename V::input_ports_type > inline void remove_edge( sender& output, V& input) { remove_edge(output, std::get<0>(input.input_ports())); } //! Returns a copy of the body from a function or continue node template< typename Body, typename Node > Body copy_body( Node &n ) { return n.template copy_function_object(); } //composite_node template< typename InputTuple, typename OutputTuple > class composite_node; template< typename... InputTypes, typename... OutputTypes> class composite_node , std::tuple > : public graph_node { public: typedef std::tuple< receiver&... > input_ports_type; typedef std::tuple< sender&... > output_ports_type; private: std::unique_ptr my_input_ports; std::unique_ptr my_output_ports; static const size_t NUM_INPUTS = sizeof...(InputTypes); static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); protected: void reset_node(reset_flags) override {} public: composite_node( graph &g ) : graph_node(g) { fgt_multiinput_multioutput_node( CODEPTR(), FLOW_COMPOSITE_NODE, this, &this->my_graph ); } template void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) { static_assert(NUM_INPUTS == std::tuple_size::value, "number of arguments does not match number of input ports"); static_assert(NUM_OUTPUTS == std::tuple_size::value, "number of arguments does not match number of output ports"); fgt_internal_input_alias_helper::alias_port( this, input_ports_tuple); fgt_internal_output_alias_helper::alias_port( this, output_ports_tuple); my_input_ports.reset( new input_ports_type(std::forward(input_ports_tuple)) ); my_output_ports.reset( new output_ports_type(std::forward(output_ports_tuple)) ); } template< typename... NodeTypes > void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } template< typename... NodeTypes > void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } input_ports_type& input_ports() { __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); return *my_input_ports; } output_ports_type& output_ports() { __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); return *my_output_ports; } }; // class composite_node //composite_node with only input ports template< typename... InputTypes> class composite_node , std::tuple<> > : public graph_node { public: typedef std::tuple< receiver&... > input_ports_type; private: std::unique_ptr my_input_ports; static const size_t NUM_INPUTS = sizeof...(InputTypes); protected: void reset_node(reset_flags) override {} public: composite_node( graph &g ) : graph_node(g) { fgt_composite( CODEPTR(), this, &g ); } template void set_external_ports(T&& input_ports_tuple) { static_assert(NUM_INPUTS == std::tuple_size::value, "number of arguments does not match number of input ports"); fgt_internal_input_alias_helper::alias_port( this, input_ports_tuple); my_input_ports.reset( new input_ports_type(std::forward(input_ports_tuple)) ); } template< typename... NodeTypes > void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } template< typename... NodeTypes > void add_nodes( const NodeTypes&... n) { add_nodes_impl(this, false, n...); } input_ports_type& input_ports() { __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); return *my_input_ports; } }; // class composite_node //composite_nodes with only output_ports template class composite_node , std::tuple > : public graph_node { public: typedef std::tuple< sender&... > output_ports_type; private: std::unique_ptr my_output_ports; static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); protected: void reset_node(reset_flags) override {} public: __TBB_NOINLINE_SYM composite_node( graph &g ) : graph_node(g) { fgt_composite( CODEPTR(), this, &g ); } template void set_external_ports(T&& output_ports_tuple) { static_assert(NUM_OUTPUTS == std::tuple_size::value, "number of arguments does not match number of output ports"); fgt_internal_output_alias_helper::alias_port( this, output_ports_tuple); my_output_ports.reset( new output_ports_type(std::forward(output_ports_tuple)) ); } template void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } template void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } output_ports_type& output_ports() { __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); return *my_output_ports; } }; // class composite_node template class async_body_base: no_assign { public: typedef Gateway gateway_type; async_body_base(gateway_type *gateway): my_gateway(gateway) { } void set_gateway(gateway_type *gateway) { my_gateway = gateway; } protected: gateway_type *my_gateway; }; template class async_body: public async_body_base { private: Body my_body; public: typedef async_body_base base_type; typedef Gateway gateway_type; async_body(const Body &body, gateway_type *gateway) : base_type(gateway), my_body(body) { } void operator()( const Input &v, Ports & ) noexcept(noexcept(my_body(v, std::declval()))) { my_body(v, *this->my_gateway); } Body get_body() { return my_body; } }; //! Implements async node template < typename Input, typename Output, typename Policy = queueing_lightweight > __TBB_requires(std::default_initializable && std::copy_constructible) class async_node : public multifunction_node< Input, std::tuple< Output >, Policy >, public sender< Output > { typedef multifunction_node< Input, std::tuple< Output >, Policy > base_type; typedef multifunction_input< Input, typename base_type::output_ports_type, Policy, cache_aligned_allocator> mfn_input_type; public: typedef Input input_type; typedef Output output_type; typedef receiver receiver_type; typedef receiver successor_type; typedef sender predecessor_type; typedef receiver_gateway gateway_type; typedef async_body_base async_body_base_type; typedef typename base_type::output_ports_type output_ports_type; private: class receiver_gateway_impl: public receiver_gateway { public: receiver_gateway_impl(async_node* node): my_node(node) {} void reserve_wait() override { fgt_async_reserve(static_cast(my_node), &my_node->my_graph); my_node->my_graph.reserve_wait(); } void release_wait() override { async_node* n = my_node; graph* g = &n->my_graph; g->release_wait(); fgt_async_commit(static_cast(n), g); } //! Implements gateway_type::try_put for an external activity to submit a message to FG bool try_put(const Output &i) override { return my_node->try_put_impl(i); } private: async_node* my_node; } my_gateway; //The substitute of 'this' for member construction, to prevent compiler warnings async_node* self() { return this; } //! Implements gateway_type::try_put for an external activity to submit a message to FG bool try_put_impl(const Output &i) { multifunction_output &port_0 = output_port<0>(*this); broadcast_cache& port_successors = port_0.successors(); fgt_async_try_put_begin(this, &port_0); // TODO revamp: change to std::list graph_task_list tasks; bool is_at_least_one_put_successful = port_successors.gather_successful_try_puts(i, tasks); __TBB_ASSERT( is_at_least_one_put_successful || tasks.empty(), "Return status is inconsistent with the method operation." ); while( !tasks.empty() ) { enqueue_in_graph_arena(this->my_graph, tasks.pop_front()); } fgt_async_try_put_end(this, &port_0); return is_at_least_one_put_successful; } public: template __TBB_requires(async_node_body) __TBB_NOINLINE_SYM async_node( graph &g, size_t concurrency, Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) : base_type( g, concurrency, async_body (body, &my_gateway), a_priority ), my_gateway(self()) { fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE, &this->my_graph, static_cast *>(this), this->output_ports(), this->my_body ); } template __TBB_requires(async_node_body) __TBB_NOINLINE_SYM async_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) : async_node(g, concurrency, body, Policy(), a_priority) {} #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template __TBB_requires(async_node_body) __TBB_NOINLINE_SYM async_node( const node_set& nodes, size_t concurrency, Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) : async_node(nodes.graph_reference(), concurrency, body, a_priority) { make_edges_in_order(nodes, *this); } template __TBB_requires(async_node_body) __TBB_NOINLINE_SYM async_node(const node_set& nodes, size_t concurrency, Body body, node_priority_t a_priority) : async_node(nodes, concurrency, body, Policy(), a_priority) {} #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET __TBB_NOINLINE_SYM async_node( const async_node &other ) : base_type(other), sender(), my_gateway(self()) { static_cast(this->my_body->get_body_ptr())->set_gateway(&my_gateway); static_cast(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway); fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE, &this->my_graph, static_cast *>(this), this->output_ports(), this->my_body ); } gateway_type& gateway() { return my_gateway; } // Define sender< Output > //! Add a new successor to this node bool register_successor(successor_type&) override { __TBB_ASSERT(false, "Successors must be registered only via ports"); return false; } //! Removes a successor from this node bool remove_successor(successor_type&) override { __TBB_ASSERT(false, "Successors must be removed only via ports"); return false; } template Body copy_function_object() { typedef multifunction_body mfn_body_type; typedef async_body async_body_type; mfn_body_type &body_ref = *this->my_body; async_body_type ab = *static_cast(dynamic_cast< multifunction_body_leaf & >(body_ref).get_body_ptr()); return ab.get_body(); } protected: void reset_node( reset_flags f) override { base_type::reset_node(f); } }; #include "detail/_flow_graph_node_set_impl.h" template< typename T > class overwrite_node : public graph_node, public receiver, public sender { public: typedef T input_type; typedef T output_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) : graph_node(g), my_successors(this), my_buffer_is_valid(false) { fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template overwrite_node(const node_set& nodes) : overwrite_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor; doesn't take anything from src; default won't work __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {} ~overwrite_node() {} bool register_successor( successor_type &s ) override { spin_mutex::scoped_lock l( my_mutex ); if (my_buffer_is_valid && is_graph_active( my_graph )) { // We have a valid value that must be forwarded immediately. bool ret = s.try_put( my_buffer ); if ( ret ) { // We add the successor that accepted our put my_successors.register_successor( s ); } else { // In case of reservation a race between the moment of reservation and register_successor can appear, // because failed reserve does not mean that register_successor is not ready to put a message immediately. // We have some sort of infinite loop: reserving node tries to set pull state for the edge, // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation. small_object_allocator allocator{}; typedef register_predecessor_task task_type; graph_task* t = allocator.new_object(graph_reference(), allocator, *this, s); graph_reference().reserve_wait(); spawn_in_graph_arena( my_graph, *t ); } } else { // No valid value yet, just add as successor my_successors.register_successor( s ); } return true; } bool remove_successor( successor_type &s ) override { spin_mutex::scoped_lock l( my_mutex ); my_successors.remove_successor(s); return true; } bool try_get( input_type &v ) override { spin_mutex::scoped_lock l( my_mutex ); if ( my_buffer_is_valid ) { v = my_buffer; return true; } return false; } //! Reserves an item bool try_reserve( T &v ) override { return try_get(v); } //! Releases the reserved item bool try_release() override { return true; } //! Consumes the reserved item bool try_consume() override { return true; } bool is_valid() { spin_mutex::scoped_lock l( my_mutex ); return my_buffer_is_valid; } void clear() { spin_mutex::scoped_lock l( my_mutex ); my_buffer_is_valid = false; } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; graph_task* try_put_task( const input_type &v ) override { spin_mutex::scoped_lock l( my_mutex ); return try_put_task_impl(v); } graph_task * try_put_task_impl(const input_type &v) { my_buffer = v; my_buffer_is_valid = true; graph_task* rtask = my_successors.try_put_task(v); if (!rtask) rtask = SUCCESSFULLY_ENQUEUED; return rtask; } graph& graph_reference() const override { return my_graph; } //! Breaks an infinite loop between the node reservation and register_successor call struct register_predecessor_task : public graph_task { register_predecessor_task( graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) : graph_task(g, allocator), o(owner), s(succ) {}; task* execute(execution_data& ed) override { // TODO revamp: investigate why qualification is needed for register_successor() call using tbb::detail::d1::register_predecessor; using tbb::detail::d1::register_successor; if ( !register_predecessor(s, o) ) { register_successor(o, s); } finalize(ed); return nullptr; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } predecessor_type& o; successor_type& s; }; spin_mutex my_mutex; broadcast_cache< input_type, null_rw_mutex > my_successors; input_type my_buffer; bool my_buffer_is_valid; void reset_node( reset_flags f) override { my_buffer_is_valid = false; if (f&rf_clear_edges) { my_successors.clear(); } } }; // overwrite_node template< typename T > class write_once_node : public overwrite_node { public: typedef T input_type; typedef T output_type; typedef overwrite_node base_type; typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; //! Constructor __TBB_NOINLINE_SYM explicit write_once_node(graph& g) : base_type(g) { fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template write_once_node(const node_set& nodes) : write_once_node(nodes.graph_reference()) { make_edges_in_order(nodes, *this); } #endif //! Copy constructor: call base class copy constructor __TBB_NOINLINE_SYM write_once_node( const write_once_node& src ) : base_type(src) { fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), static_cast *>(this), static_cast *>(this) ); } protected: template< typename R, typename B > friend class run_and_put_task; template friend class broadcast_cache; template friend class round_robin_cache; graph_task *try_put_task( const T &v ) override { spin_mutex::scoped_lock l( this->my_mutex ); return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v); } }; // write_once_node inline void set_name(const graph& g, const char *name) { fgt_graph_desc(&g, name); } template inline void set_name(const input_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const function_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const continue_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const broadcast_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const buffer_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const queue_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const sequencer_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const priority_queue_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const limiter_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const join_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const indexer_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const overwrite_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const write_once_node& node, const char *name) { fgt_node_desc(&node, name); } template inline void set_name(const multifunction_node& node, const char *name) { fgt_multioutput_node_desc(&node, name); } template inline void set_name(const split_node& node, const char *name) { fgt_multioutput_node_desc(&node, name); } template< typename InputTuple, typename OutputTuple > inline void set_name(const composite_node& node, const char *name) { fgt_multiinput_multioutput_node_desc(&node, name); } template inline void set_name(const async_node& node, const char *name) { fgt_multioutput_node_desc(&node, name); } } // d1 } // detail } // tbb // Include deduction guides for node classes #include "detail/_flow_graph_nodes_deduction.h" namespace tbb { namespace flow { inline namespace v1 { using detail::d1::receiver; using detail::d1::sender; using detail::d1::serial; using detail::d1::unlimited; using detail::d1::reset_flags; using detail::d1::rf_reset_protocol; using detail::d1::rf_reset_bodies; using detail::d1::rf_clear_edges; using detail::d1::graph; using detail::d1::graph_node; using detail::d1::continue_msg; using detail::d1::input_node; using detail::d1::function_node; using detail::d1::multifunction_node; using detail::d1::split_node; using detail::d1::output_port; using detail::d1::indexer_node; using detail::d1::tagged_msg; using detail::d1::cast_to; using detail::d1::is_a; using detail::d1::continue_node; using detail::d1::overwrite_node; using detail::d1::write_once_node; using detail::d1::broadcast_node; using detail::d1::buffer_node; using detail::d1::queue_node; using detail::d1::sequencer_node; using detail::d1::priority_queue_node; using detail::d1::limiter_node; using namespace detail::d1::graph_policy_namespace; using detail::d1::join_node; using detail::d1::input_port; using detail::d1::copy_body; using detail::d1::make_edge; using detail::d1::remove_edge; using detail::d1::tag_value; using detail::d1::composite_node; using detail::d1::async_node; using detail::d1::node_priority_t; using detail::d1::no_priority; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET using detail::d1::follows; using detail::d1::precedes; using detail::d1::make_node_set; using detail::d1::make_edges; #endif } // v1 } // flow using detail::d1::flow_control; namespace profiling { using detail::d1::set_name; } // profiling } // tbb #if TBB_USE_PROFILING_TOOLS && ( __unix__ || __APPLE__ ) // We don't do pragma pop here, since it still gives warning on the USER side #undef __TBB_NOINLINE_SYM #endif #endif // __TBB_flow_graph_H flow_graph_abstractions.h000066400000000000000000000026461514453371700336060ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_flow_graph_abstractions_H #define __TBB_flow_graph_abstractions_H namespace tbb { namespace detail { namespace d1 { //! Pure virtual template classes that define interfaces for async communication class graph_proxy { public: //! Inform a graph that messages may come from outside, to prevent premature graph completion virtual void reserve_wait() = 0; //! Inform a graph that a previous call to reserve_wait is no longer in effect virtual void release_wait() = 0; virtual ~graph_proxy() {} }; template class receiver_gateway : public graph_proxy { public: //! Type of inputing data into FG. typedef Input input_type; //! Submit signal from an asynchronous activity to FG. virtual bool try_put(const input_type&) = 0; }; } // d1 } // detail } // tbb #endif level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/global_control.h000066400000000000000000000144151514453371700317560ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_global_control_H #define __TBB_global_control_H #include "detail/_config.h" #include "detail/_assert.h" #include "detail/_attach.h" #include "detail/_exception.h" #include "detail/_namespace_injection.h" #include "detail/_template_helpers.h" #include #include // std::nothrow_t namespace tbb { namespace detail { namespace d1 { class global_control; class task_scheduler_handle; } namespace r1 { TBB_EXPORT void __TBB_EXPORTED_FUNC create(d1::global_control&); TBB_EXPORT void __TBB_EXPORTED_FUNC destroy(d1::global_control&); TBB_EXPORT std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); struct global_control_impl; struct control_storage_comparator; void release_impl(d1::task_scheduler_handle& handle); bool finalize_impl(d1::task_scheduler_handle& handle); TBB_EXPORT void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); TBB_EXPORT bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); } namespace d1 { class global_control { public: enum parameter { max_allowed_parallelism, thread_stack_size, terminate_on_exception, scheduler_handle, // not a public parameter parameter_max // insert new parameters above this point }; global_control(parameter p, std::size_t value) : my_value(value), my_reserved(), my_param(p) { suppress_unused_warning(my_reserved); __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); #if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) // For Windows 8 Store* apps it's impossible to set stack size if (p==thread_stack_size) return; #elif __TBB_x86_64 && (_WIN32 || _WIN64) if (p==thread_stack_size) __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range"); #endif if (my_param==max_allowed_parallelism) __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0."); r1::create(*this); } ~global_control() { __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); #if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) // For Windows 8 Store* apps it's impossible to set stack size if (my_param==thread_stack_size) return; #endif r1::destroy(*this); } static std::size_t active_value(parameter p) { __TBB_ASSERT(p < parameter_max, "Invalid parameter"); return r1::global_control_active_value((int)p); } private: std::size_t my_value; std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility parameter my_param; friend struct r1::global_control_impl; friend struct r1::control_storage_comparator; }; //! Finalization options. //! Outside of the class to avoid extensive friendship. static constexpr std::intptr_t release_nothrowing = 0; static constexpr std::intptr_t finalize_nothrowing = 1; static constexpr std::intptr_t finalize_throwing = 2; //! User side wrapper for a task scheduler lifetime control object class task_scheduler_handle { public: //! Creates an empty task_scheduler_handle task_scheduler_handle() = default; //! Creates an attached instance of task_scheduler_handle task_scheduler_handle(attach) { r1::get(*this); } //! Release a reference if any ~task_scheduler_handle() { release(); } //! No copy task_scheduler_handle(const task_scheduler_handle& other) = delete; task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete; //! Move only task_scheduler_handle(task_scheduler_handle&& other) noexcept { std::swap(m_ctl, other.m_ctl); } task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept { std::swap(m_ctl, other.m_ctl); return *this; }; //! Checks if the task_scheduler_handle is empty explicit operator bool() const noexcept { return m_ctl != nullptr; } //! Release the reference and deactivate handle void release() { if (m_ctl != nullptr) { r1::finalize(*this, release_nothrowing); m_ctl = nullptr; } } private: friend void r1::release_impl(task_scheduler_handle& handle); friend bool r1::finalize_impl(task_scheduler_handle& handle); friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&); friend void finalize(task_scheduler_handle&); friend bool finalize(task_scheduler_handle&, const std::nothrow_t&) noexcept; global_control* m_ctl{nullptr}; }; #if TBB_USE_EXCEPTIONS //! Waits for worker threads termination. Throws exception on error. inline void finalize(task_scheduler_handle& handle) { try_call([&] { if (handle.m_ctl != nullptr) { bool finalized = r1::finalize(handle, finalize_throwing); __TBB_ASSERT_EX(finalized, "r1::finalize did not respect finalize_throwing ?"); } }).on_completion([&] { __TBB_ASSERT(!handle, "The handle should be empty after finalize"); }); } #endif //! Waits for worker threads termination. Returns false on error. inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept { bool finalized = true; if (handle.m_ctl != nullptr) { finalized = r1::finalize(handle, finalize_nothrowing); } __TBB_ASSERT(!handle, "The handle should be empty after finalize"); return finalized; } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::global_control; using detail::d1::attach; using detail::d1::finalize; using detail::d1::task_scheduler_handle; using detail::r1::unsafe_wait; } // namespace v1 } // namespace tbb #endif // __TBB_global_control_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/info.h000066400000000000000000000077261514453371700277200ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_info_H #define __TBB_info_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #if __TBB_ARENA_BINDING #include #include namespace tbb { namespace detail { namespace d1{ using numa_node_id = int; using core_type_id = int; // TODO: consider version approach to resolve backward compatibility potential issues. struct constraints { #if !__TBB_CPP20_PRESENT constraints(numa_node_id id = -1, int maximal_concurrency = -1) : numa_id(id) , max_concurrency(maximal_concurrency) {} #endif /*!__TBB_CPP20_PRESENT*/ constraints& set_numa_id(numa_node_id id) { numa_id = id; return *this; } constraints& set_max_concurrency(int maximal_concurrency) { max_concurrency = maximal_concurrency; return *this; } #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT constraints& set_core_type(core_type_id id) { core_type = id; return *this; } constraints& set_max_threads_per_core(int threads_number) { max_threads_per_core = threads_number; return *this; } #endif numa_node_id numa_id = -1; int max_concurrency = -1; #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT core_type_id core_type = -1; int max_threads_per_core = -1; #endif }; } // namespace d1 namespace r1 { TBB_EXPORT unsigned __TBB_EXPORTED_FUNC numa_node_count(); TBB_EXPORT void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); TBB_EXPORT int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); // Reserved fields are required to save binary backward compatibility in case of future changes. // They must be defined to 0 at this moment. TBB_EXPORT unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); TBB_EXPORT void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); TBB_EXPORT int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); TBB_EXPORT int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); } // namespace r1 namespace d1 { inline std::vector numa_nodes() { std::vector node_indices(r1::numa_node_count()); r1::fill_numa_indices(node_indices.data()); return node_indices; } inline int default_concurrency(numa_node_id id = -1) { return r1::numa_default_concurrency(id); } #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT inline std::vector core_types() { std::vector core_type_indexes(r1::core_type_count()); r1::fill_core_type_indices(core_type_indexes.data()); return core_type_indexes; } inline int default_concurrency(constraints c) { if (c.max_concurrency > 0) { return c.max_concurrency; } return r1::constraints_default_concurrency(c); } #endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::numa_node_id; #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT using detail::d1::core_type_id; #endif namespace info { using detail::d1::numa_nodes; #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT using detail::d1::core_types; #endif using detail::d1::default_concurrency; } // namespace info } // namespace v1 } // namespace tbb #endif /*__TBB_ARENA_BINDING*/ #endif /*__TBB_info_H*/ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/memory_pool.h000066400000000000000000000233651514453371700313230ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_memory_pool_H #define __TBB_memory_pool_H #if !TBB_PREVIEW_MEMORY_POOL #error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h #endif /** @file */ #include "scalable_allocator.h" #include // std::bad_alloc #include // std::runtime_error, std::invalid_argument #include // std::forward #if __TBB_EXTRA_DEBUG #define __TBBMALLOC_ASSERT ASSERT #else #define __TBBMALLOC_ASSERT(a,b) ((void)0) #endif namespace tbb { namespace detail { namespace d1 { //! Base of thread-safe pool allocator for variable-size requests class pool_base : no_copy { // Pool interface is separate from standard allocator classes because it has // to maintain internal state, no copy or assignment. Move and swap are possible. public: //! Reset pool to reuse its memory (free all objects at once) void recycle() { rml::pool_reset(my_pool); } //! The "malloc" analogue to allocate block of memory of size bytes void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); } //! The "free" analogue to discard a previously allocated piece of memory. void free(void* ptr) { rml::pool_free(my_pool, ptr); } //! The "realloc" analogue complementing pool_malloc. // Enables some low-level optimization possibilities void *realloc(void* ptr, size_t size) { return rml::pool_realloc(my_pool, ptr, size); } protected: //! destroy pool - must be called in a child class void destroy() { rml::pool_destroy(my_pool); } rml::MemoryPool *my_pool; }; #if _MSC_VER && !defined(__INTEL_COMPILER) // Workaround for erroneous "unreferenced parameter" warning in method destroy. #pragma warning (push) #pragma warning (disable: 4100) #endif //! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5 /** @ingroup memory_allocation */ template class memory_pool_allocator { protected: typedef P pool_type; pool_type *my_pool; template friend class memory_pool_allocator; template friend bool operator==( const memory_pool_allocator& a, const memory_pool_allocator& b); template friend bool operator!=( const memory_pool_allocator& a, const memory_pool_allocator& b); public: typedef T value_type; typedef value_type* pointer; typedef const value_type* const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; template struct rebind { typedef memory_pool_allocator other; }; explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {} memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} template memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } //! Allocate space for n objects. pointer allocate( size_type n, const void* /*hint*/ = 0) { pointer p = static_cast( my_pool->malloc( n*sizeof(value_type) ) ); if (!p) throw_exception(std::bad_alloc()); return p; } //! Free previously allocated block of memory. void deallocate( pointer p, size_type ) { my_pool->free(p); } //! Largest value for which method allocate might succeed. size_type max_size() const throw() { size_type max = static_cast(-1) / sizeof (value_type); return (max > 0 ? max : 1); } //! Copy-construct value at location pointed to by p. template void construct(U *p, Args&&... args) { ::new((void *)p) U(std::forward(args)...); } //! Destroy value at location pointed to by p. void destroy( pointer p ) { p->~value_type(); } }; #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning (pop) #endif // warning 4100 is back //! Analogous to std::allocator, as defined in ISO C++ Standard, Section 20.4.1 /** @ingroup memory_allocation */ template class memory_pool_allocator { public: typedef P pool_type; typedef void* pointer; typedef const void* const_pointer; typedef void value_type; template struct rebind { typedef memory_pool_allocator other; }; explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {} memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} template memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} protected: pool_type *my_pool; template friend class memory_pool_allocator; template friend bool operator==( const memory_pool_allocator& a, const memory_pool_allocator& b); template friend bool operator!=( const memory_pool_allocator& a, const memory_pool_allocator& b); }; template inline bool operator==( const memory_pool_allocator& a, const memory_pool_allocator& b) {return a.my_pool==b.my_pool;} template inline bool operator!=( const memory_pool_allocator& a, const memory_pool_allocator& b) {return a.my_pool!=b.my_pool;} //! Thread-safe growable pool allocator for variable-size requests template class memory_pool : public pool_base { Alloc my_alloc; // TODO: base-class optimization static void *allocate_request(intptr_t pool_id, size_t & bytes); static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes); public: //! construct pool with underlying allocator explicit memory_pool(const Alloc &src = Alloc()); //! destroy pool ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter }; class fixed_pool : public pool_base { void *my_buffer; size_t my_size; inline static void *allocate_request(intptr_t pool_id, size_t & bytes); public: //! construct pool with underlying allocator inline fixed_pool(void *buf, size_t size); //! destroy pool ~fixed_pool() { destroy(); } }; //////////////// Implementation /////////////// template memory_pool::memory_pool(const Alloc &src) : my_alloc(src) { rml::MemPoolPolicy args(allocate_request, deallocate_request, sizeof(typename Alloc::value_type)); rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); if (res!=rml::POOL_OK) throw_exception(std::runtime_error("Can't create pool")); } template void *memory_pool::allocate_request(intptr_t pool_id, size_t & bytes) { memory_pool &self = *reinterpret_cast*>(pool_id); const size_t unit_size = sizeof(typename Alloc::value_type); __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL); void *ptr; #if TBB_USE_EXCEPTIONS try { #endif ptr = self.my_alloc.allocate( bytes/unit_size ); #if TBB_USE_EXCEPTIONS } catch(...) { return 0; } #endif return ptr; } #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED // Workaround for erroneous "unreachable code" warning in the template below. // Specific for VC++ 17-18 compiler #pragma warning (push) #pragma warning (disable: 4702) #endif template int memory_pool::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) { memory_pool &self = *reinterpret_cast*>(pool_id); const size_t unit_size = sizeof(typename Alloc::value_type); __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL); self.my_alloc.deallocate( static_cast(raw_ptr), raw_bytes/unit_size ); return 0; } #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED #pragma warning (pop) #endif inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) { if (!buf || !size) // TODO: improve support for mode with exceptions disabled throw_exception(std::invalid_argument("Zero in parameter is invalid")); rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true); rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); if (res!=rml::POOL_OK) throw_exception(std::runtime_error("Can't create pool")); } inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) { fixed_pool &self = *reinterpret_cast(pool_id); __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice."); bytes = self.my_size; self.my_size = 0; // remember that buffer has been used return self.my_buffer; } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::memory_pool_allocator; using detail::d1::memory_pool; using detail::d1::fixed_pool; } // inline namepspace v1 } // namespace tbb #undef __TBBMALLOC_ASSERT #endif// __TBB_memory_pool_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/mutex.h000066400000000000000000000050051514453371700301130ustar00rootroot00000000000000/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_mutex_H #define __TBB_mutex_H #include "detail/_namespace_injection.h" #include "detail/_utils.h" #include "detail/_scoped_lock.h" #include "detail/_waitable_atomic.h" #include "detail/_mutex_common.h" #include "profiling.h" namespace tbb { namespace detail { namespace d1 { class mutex { public: //! Constructors mutex() { create_itt_sync(this, "tbb::mutex", ""); }; //! Destructor ~mutex() { __TBB_ASSERT(!my_flag.load(std::memory_order_relaxed), "destruction of an acquired mutex"); } //! No Copy mutex(const mutex&) = delete; mutex& operator=(const mutex&) = delete; using scoped_lock = unique_scoped_lock; //! Mutex traits static constexpr bool is_rw_mutex = false; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = false; //! Acquire lock /** Spin if the lock is taken */ void lock() { call_itt_notify(prepare, this); while (!try_lock()) { my_flag.wait(true, /* context = */ 0, std::memory_order_relaxed); } } //! Try acquiring lock (non-blocking) /** Return true if lock acquired; false otherwise. */ bool try_lock() { bool result = !my_flag.load(std::memory_order_relaxed) && !my_flag.exchange(true); if (result) { call_itt_notify(acquired, this); } return result; } //! Release lock void unlock() { call_itt_notify(releasing, this); // We need Write Read memory barrier before notify that reads the waiter list. // In C++ only full fence covers this type of barrier. my_flag.exchange(false); my_flag.notify_one_relaxed(); } private: waitable_atomic my_flag{0}; }; // class mutex } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::mutex; } // namespace v1 } // namespace tbb #endif // __TBB_mutex_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/null_mutex.h000066400000000000000000000041451514453371700311510ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_null_mutex_H #define __TBB_null_mutex_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_mutex_common.h" namespace tbb { namespace detail { namespace d1 { //! A mutex which does nothing /** A null_mutex does no operation and simulates success. @ingroup synchronization */ class null_mutex { public: //! Constructors constexpr null_mutex() noexcept = default; //! Destructor ~null_mutex() = default; //! No Copy null_mutex(const null_mutex&) = delete; null_mutex& operator=(const null_mutex&) = delete; //! Represents acquisition of a mutex. class scoped_lock { public: //! Constructors constexpr scoped_lock() noexcept = default; scoped_lock(null_mutex&) {} //! Destructor ~scoped_lock() = default; //! No Copy scoped_lock(const scoped_lock&) = delete; scoped_lock& operator=(const scoped_lock&) = delete; void acquire(null_mutex&) {} bool try_acquire(null_mutex&) { return true; } void release() {} }; //! Mutex traits static constexpr bool is_rw_mutex = false; static constexpr bool is_recursive_mutex = true; static constexpr bool is_fair_mutex = true; void lock() {} bool try_lock() { return true; } void unlock() {} }; // class null_mutex } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::null_mutex; } // namespace v1 } // namespace tbb #endif /* __TBB_null_mutex_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/null_rw_mutex.h000066400000000000000000000047221514453371700316620ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_null_rw_mutex_H #define __TBB_null_rw_mutex_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_mutex_common.h" namespace tbb { namespace detail { namespace d1 { //! A rw mutex which does nothing /** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation. @ingroup synchronization */ class null_rw_mutex { public: //! Constructors constexpr null_rw_mutex() noexcept = default; //! Destructor ~null_rw_mutex() = default; //! No Copy null_rw_mutex(const null_rw_mutex&) = delete; null_rw_mutex& operator=(const null_rw_mutex&) = delete; //! Represents acquisition of a mutex. class scoped_lock { public: //! Constructors constexpr scoped_lock() noexcept = default; scoped_lock(null_rw_mutex&, bool = true) {} //! Destructor ~scoped_lock() = default; //! No Copy scoped_lock(const scoped_lock&) = delete; scoped_lock& operator=(const scoped_lock&) = delete; void acquire(null_rw_mutex&, bool = true) {} bool try_acquire(null_rw_mutex&, bool = true) { return true; } void release() {} bool upgrade_to_writer() { return true; } bool downgrade_to_reader() { return true; } bool is_writer() const { return true; } }; //! Mutex traits static constexpr bool is_rw_mutex = true; static constexpr bool is_recursive_mutex = true; static constexpr bool is_fair_mutex = true; void lock() {} bool try_lock() { return true; } void unlock() {} void lock_shared() {} bool try_lock_shared() { return true; } void unlock_shared() {} }; // class null_rw_mutex } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::null_rw_mutex; } // namespace v1 } // namespace tbb #endif /* __TBB_null_rw_mutex_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_for.h000066400000000000000000000556331514453371700314270ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_for_H #define __TBB_parallel_for_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_exception.h" #include "detail/_task.h" #include "detail/_small_object_pool.h" #include "profiling.h" #include "partitioner.h" #include "blocked_range.h" #include "task_group.h" #include #include namespace tbb { namespace detail { #if __TBB_CPP20_CONCEPTS_PRESENT inline namespace d0 { template concept parallel_for_body = std::copy_constructible && requires( const std::remove_reference_t& body, Range& range ) { body(range); }; template concept parallel_for_index = std::constructible_from && std::copyable && requires( const std::remove_reference_t& lhs, const std::remove_reference_t& rhs ) { { lhs < rhs } -> adaptive_same_as; { lhs - rhs } -> std::convertible_to; { lhs + (rhs - lhs) } -> std::convertible_to; }; template concept parallel_for_function = requires( const std::remove_reference_t& func, Index index ) { func(index); }; } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT namespace d1 { //! Task type used in parallel_for /** @ingroup algorithms */ template struct start_for : public task { Range my_range; const Body my_body; node* my_parent; typename Partitioner::task_partition_type my_partition; small_object_allocator my_allocator; task* execute(execution_data&) override; task* cancel(execution_data&) override; void finalize(const execution_data&); //! Constructor for root task. start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : my_range(range), my_body(body), my_partition(partitioner), my_allocator(alloc) {} //! Splitting constructor used to generate children. /** parent_ becomes left child. Newly constructed object is right child. */ start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(parent_.my_body), my_partition(parent_.my_partition, split_obj), my_allocator(alloc) {} //! Construct right child from the given range as response to the demand. /** parent_ remains left child. Newly constructed object is right child. */ start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : my_range(r), my_body(parent_.my_body), my_partition(parent_.my_partition, split()), my_allocator(alloc) { my_partition.align_depth( d ); } static void run(const Range& range, const Body& body, Partitioner& partitioner) { task_group_context context(PARALLEL_FOR); run(range, body, partitioner, context); } static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) { if ( !range.empty() ) { small_object_allocator alloc{}; start_for& for_task = *alloc.new_object(range, body, partitioner, alloc); // defer creation of the wait node until task allocation succeeds wait_node wn; for_task.my_parent = &wn; execute_and_wait(for_task, context, wn.m_wait, context); } } //! Run body for range, serves as callback for partitioner void run_body( Range &r ) { my_body( r ); } //! spawn right task, serves as callback for partitioner void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { offer_work_impl(ed, *this, split_obj); } //! spawn right task, serves as callback for partitioner void offer_work(const Range& r, depth_t d, execution_data& ed) { offer_work_impl(ed, *this, r, d); } private: template void offer_work_impl(execution_data& ed, Args&&... constructor_args) { // New right child small_object_allocator alloc{}; start_for& right_child = *alloc.new_object(ed, std::forward(constructor_args)..., alloc); // New root node as a continuation and ref count. Left and right child attach to the new parent. right_child.my_parent = my_parent = alloc.new_object(ed, my_parent, 2, alloc); // Spawn the right sibling right_child.spawn_self(ed); } void spawn_self(execution_data& ed) { my_partition.spawn_task(*this, *context(ed)); } }; //! fold the tree and deallocate the task template void start_for::finalize(const execution_data& ed) { // Get the current parent and allocator an object destruction node* parent = my_parent; auto allocator = my_allocator; // Task execution finished - destroy it this->~start_for(); // Unwind the tree decrementing the parent`s reference count fold_tree(parent, ed); allocator.deallocate(this, ed); } //! execute task for parallel_for template task* start_for::execute(execution_data& ed) { if (!is_same_affinity(ed)) { my_partition.note_affinity(execution_slot(ed)); } my_partition.check_being_stolen(*this, ed); my_partition.execute(*this, my_range, ed); finalize(ed); return nullptr; } //! cancel task for parallel_for template task* start_for::cancel(execution_data& ed) { finalize(ed); return nullptr; } //! Calls the function with values from range [begin, end) with a step provided template class parallel_for_body_wrapper : detail::no_assign { const Function &my_func; const Index my_begin; const Index my_step; public: parallel_for_body_wrapper( const Function& _func, Index& _begin, Index& _step ) : my_func(_func), my_begin(_begin), my_step(_step) {} void operator()( const blocked_range& r ) const { // A set of local variables to help the compiler with vectorization of the following loop. Index b = r.begin(); Index e = r.end(); Index ms = my_step; Index k = my_begin + b*ms; #if __INTEL_COMPILER #pragma ivdep #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE #pragma vector always assert #endif #endif for ( Index i = b; i < e; ++i, k += ms ) { my_func( k ); } } }; // Requirements on Range concept are documented in blocked_range.h /** \page parallel_for_body_req Requirements on parallel_for body Class \c Body implementing the concept of parallel_for body must define: - \code Body::Body( const Body& ); \endcode Copy constructor - \code Body::~Body(); \endcode Destructor - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r. **/ /** \name parallel_for See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/ //@{ //! Parallel iteration over range with default partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body ) { start_for::run(range,body,__TBB_DEFAULT_PARTITIONER()); } //! Parallel iteration over range with simple partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { start_for::run(range,body,partitioner); } //! Parallel iteration over range with auto_partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { start_for::run(range,body,partitioner); } //! Parallel iteration over range with static_partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { start_for::run(range,body,partitioner); } //! Parallel iteration over range with affinity_partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { start_for::run(range,body,partitioner); } //! Parallel iteration over range with default partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, task_group_context& context ) { start_for::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); } //! Parallel iteration over range with simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { start_for::run(range, body, partitioner, context); } //! Parallel iteration over range with auto_partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { start_for::run(range, body, partitioner, context); } //! Parallel iteration over range with static_partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { start_for::run(range, body, partitioner, context); } //! Parallel iteration over range with affinity_partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_for_body) void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { start_for::run(range,body,partitioner, context); } //! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner template void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { if (step <= 0 ) throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument else if (first < last) { // Above "else" avoids "potential divide by zero" warning on some platforms Index end = (last - first - Index(1)) / step + Index(1); blocked_range range(static_cast(0), end); parallel_for_body_wrapper body(f, first, step); parallel_for(range, body, partitioner); } } //! Parallel iteration over a range of integers with a step provided and default partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f) { parallel_for_impl(first, last, step, f, auto_partitioner()); } //! Parallel iteration over a range of integers with a step provided and simple partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { parallel_for_impl(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a step provided and auto partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { parallel_for_impl(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a step provided and static partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { parallel_for_impl(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a step provided and affinity partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { parallel_for_impl(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a default step value and default partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f) { parallel_for_impl(first, last, static_cast(1), f, auto_partitioner()); } //! Parallel iteration over a range of integers with a default step value and simple partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { parallel_for_impl(first, last, static_cast(1), f, partitioner); } //! Parallel iteration over a range of integers with a default step value and auto partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { parallel_for_impl(first, last, static_cast(1), f, partitioner); } //! Parallel iteration over a range of integers with a default step value and static partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { parallel_for_impl(first, last, static_cast(1), f, partitioner); } //! Parallel iteration over a range of integers with a default step value and affinity partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { parallel_for_impl(first, last, static_cast(1), f, partitioner); } //! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner template void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) { if (step <= 0 ) throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument else if (first < last) { // Above "else" avoids "potential divide by zero" warning on some platforms Index end = (last - first - Index(1)) / step + Index(1); blocked_range range(static_cast(0), end); parallel_for_body_wrapper body(f, first, step); parallel_for(range, body, partitioner, context); } } //! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { parallel_for_impl(first, last, step, f, auto_partitioner(), context); } //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { parallel_for_impl(first, last, static_cast(1), f, auto_partitioner(), context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, static_cast(1), f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, static_cast(1), f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, static_cast(1), f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, static_cast(1), f, partitioner, context); } // @} } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::parallel_for; // Split types using detail::split; using detail::proportional_split; } // namespace v1 } // namespace tbb #endif /* __TBB_parallel_for_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_for_each.h000066400000000000000000000646701514453371700324100ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_for_each_H #define __TBB_parallel_for_each_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_exception.h" #include "detail/_task.h" #include "detail/_aligned_space.h" #include "detail/_small_object_pool.h" #include "parallel_for.h" #include "task_group.h" // task_group_context #include #include namespace tbb { namespace detail { #if __TBB_CPP20_CONCEPTS_PRESENT namespace d1 { template class feeder; } // namespace d1 inline namespace d0 { template concept parallel_for_each_body = requires( const std::remove_reference_t& body, ItemType&& item ) { body(std::forward(item)); } || requires( const std::remove_reference_t& body, ItemType&& item, tbb::detail::d1::feeder& feeder ) { body(std::forward(item), feeder); }; } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT namespace d2 { template class feeder_impl; } // namespace d2 namespace d1 { //! Class the user supplied algorithm body uses to add new tasks template class feeder { feeder() {} feeder(const feeder&) = delete; void operator=( const feeder&) = delete; virtual ~feeder () {} virtual void internal_add_copy(const Item& item) = 0; virtual void internal_add_move(Item&& item) = 0; template friend class d2::feeder_impl; public: //! Add a work item to a running parallel_for_each. void add(const Item& item) {internal_add_copy(item);} void add(Item&& item) {internal_add_move(std::move(item));} }; } // namespace d1 namespace d2 { using namespace tbb::detail::d1; /** Selects one of the two possible forms of function call member operator. @ingroup algorithms **/ template struct parallel_for_each_operator_selector { public: template static auto call(const Body& body, ItemArg&& item, FeederArg*) -> decltype(body(std::forward(item)), void()) { #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) // Suppression of Microsoft non-standard extension warnings #pragma warning (push) #pragma warning (disable: 4239) #endif body(std::forward(item)); #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning (push) #endif } template static auto call(const Body& body, ItemArg&& item, FeederArg* feeder) -> decltype(body(std::forward(item), *feeder), void()) { #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) // Suppression of Microsoft non-standard extension warnings #pragma warning (push) #pragma warning (disable: 4239) #endif __TBB_ASSERT(feeder, "Feeder was not created but should be"); body(std::forward(item), *feeder); #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning (push) #endif } }; template struct feeder_item_task: public task { using feeder_type = feeder_impl; template feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : item(std::forward(input_item)), my_feeder(feeder), my_allocator(alloc) {} void finalize(const execution_data& ed) { my_feeder.my_wait_context.release(); my_allocator.delete_object(this, ed); } //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy //! Executing body with moving the copy should have higher priority using first_priority = int; using second_priority = double; template static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority) -> decltype(parallel_for_each_operator_selector::call(call_body, std::move(call_item), &call_feeder), void()) { parallel_for_each_operator_selector::call(call_body, std::move(call_item), &call_feeder); } template static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) { parallel_for_each_operator_selector::call(call_body, call_item, &call_feeder); } task* execute(execution_data& ed) override { call(my_feeder.my_body, item, my_feeder, first_priority{}); finalize(ed); return nullptr; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } Item item; feeder_type& my_feeder; small_object_allocator my_allocator; }; // class feeder_item_task /** Implements new task adding procedure. @ingroup algorithms **/ template class feeder_impl : public feeder { // Avoiding use of copy constructor in a virtual method if the type does not support it void internal_add_copy_impl(std::true_type, const Item& item) { using feeder_task = feeder_item_task; small_object_allocator alloc; auto task = alloc.new_object(item, *this, alloc); my_wait_context.reserve(); spawn(*task, my_execution_context); } void internal_add_copy_impl(std::false_type, const Item&) { __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object"); } void internal_add_copy(const Item& item) override { internal_add_copy_impl(typename std::is_copy_constructible::type(), item); } void internal_add_move(Item&& item) override { using feeder_task = feeder_item_task; small_object_allocator alloc{}; auto task = alloc.new_object(std::move(item), *this, alloc); my_wait_context.reserve(); spawn(*task, my_execution_context); } public: feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) : my_body(body), my_wait_context(w_context) , my_execution_context(context) {} const Body& my_body; wait_context& my_wait_context; task_group_context& my_execution_context; }; // class feeder_impl /** Execute computation under one element of the range @ingroup algorithms **/ template struct for_each_iteration_task: public task { using feeder_type = feeder_impl; for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl* feeder_ptr, wait_context& wait_context) : item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context) {} void finalize() { parent_wait_context.release(); } task* execute(execution_data&) override { parallel_for_each_operator_selector::call(my_body, *item_ptr, my_feeder_ptr); finalize(); return nullptr; } task* cancel(execution_data&) override { finalize(); return nullptr; } Iterator item_ptr; const Body& my_body; feeder_impl* my_feeder_ptr; wait_context& parent_wait_context; }; // class for_each_iteration_task // Helper to get the type of the iterator to the internal sequence of copies // If the element can be passed to the body as an rvalue - this iterator should be move_iterator template struct input_iteration_task_iterator_helper { // For input iterators we pass const lvalue reference to the body // It is prohibited to take non-constant lvalue references for input iterators using type = const Item*; }; template struct input_iteration_task_iterator_helper::call(std::declval(), std::declval(), std::declval*>()))>> { using type = std::move_iterator; }; /** Split one block task to several(max_block_size) iteration tasks for input iterators @ingroup algorithms **/ template struct input_block_handling_task : public task { static constexpr size_t max_block_size = 4; using feeder_type = feeder_impl; using iteration_task_iterator_type = typename input_iteration_task_iterator_helper::type; using iteration_task = for_each_iteration_task; input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), my_execution_context(e_context), my_allocator(alloc) { auto item_it = block_iteration_space.begin(); for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) { new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context); } } void finalize(const execution_data& ed) { my_root_wait_context.release(); my_allocator.delete_object(this, ed); } task* execute(execution_data& ed) override { __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); for (std::size_t counter = 1; counter < my_size; ++counter) { my_wait_context.reserve(); spawn(*(task_pool.begin() + counter), my_execution_context); } my_wait_context.reserve(); execute_and_wait(*task_pool.begin(), my_execution_context, my_wait_context, my_execution_context); // deallocate current task after children execution finalize(ed); return nullptr; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } ~input_block_handling_task() { for(std::size_t counter = 0; counter < max_block_size; ++counter) { (task_pool.begin() + counter)->~iteration_task(); (block_iteration_space.begin() + counter)->~Item(); } } aligned_space block_iteration_space; aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; wait_context& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class input_block_handling_task /** Split one block task to several(max_block_size) iteration tasks for forward iterators @ingroup algorithms **/ template struct forward_block_handling_task : public task { static constexpr size_t max_block_size = 4; using iteration_task = for_each_iteration_task; forward_block_handling_task(Iterator first, std::size_t size, wait_context& w_context, task_group_context& e_context, const Body& body, feeder_impl* feeder_ptr, small_object_allocator& alloc) : my_size(size), my_wait_context(0), my_root_wait_context(w_context), my_execution_context(e_context), my_allocator(alloc) { auto* task_it = task_pool.begin(); for (std::size_t i = 0; i < size; i++) { new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context); ++first; } } void finalize(const execution_data& ed) { my_root_wait_context.release(); my_allocator.delete_object(this, ed); } task* execute(execution_data& ed) override { __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); for(std::size_t counter = 1; counter < my_size; ++counter) { my_wait_context.reserve(); spawn(*(task_pool.begin() + counter), my_execution_context); } my_wait_context.reserve(); execute_and_wait(*task_pool.begin(), my_execution_context, my_wait_context, my_execution_context); // deallocate current task after children execution finalize(ed); return nullptr; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } ~forward_block_handling_task() { for(std::size_t counter = 0; counter < my_size; ++counter) { (task_pool.begin() + counter)->~iteration_task(); } } aligned_space task_pool; std::size_t my_size; wait_context my_wait_context; wait_context& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class forward_block_handling_task /** Body for parallel_for algorithm. * Allows to redirect operations under random access iterators range to the parallel_for algorithm. @ingroup algorithms **/ template class parallel_for_body_wrapper { Iterator my_first; const Body& my_body; feeder_impl* my_feeder_ptr; public: parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl* feeder_ptr) : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {} void operator()(tbb::blocked_range range) const { #if __INTEL_COMPILER #pragma ivdep #endif for (std::size_t count = range.begin(); count != range.end(); count++) { parallel_for_each_operator_selector::call(my_body, *(my_first + count), my_feeder_ptr); } } }; // class parallel_for_body_wrapper /** Helper for getting iterators tag including inherited custom tags @ingroup algorithms */ template using tag = typename std::iterator_traits::iterator_category; template using iterator_tag_dispatch = typename std::conditional< std::is_base_of>::value, std::random_access_iterator_tag, typename std::conditional< std::is_base_of>::value, std::forward_iterator_tag, std::input_iterator_tag >::type >::type; template using feeder_is_required = tbb::detail::void_t()(std::declval::reference>(), std::declval&>()))>; // Creates feeder object only if the body can accept it template struct feeder_holder { feeder_holder( wait_context&, task_group_context&, const Body& ) {} feeder_impl* feeder_ptr() { return nullptr; } }; // class feeder_holder template class feeder_holder> { public: feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) : my_feeder(body, w_context, context) {} feeder_impl* feeder_ptr() { return &my_feeder; } private: feeder_impl my_feeder; }; // class feeder_holder template class for_each_root_task_base : public task { public: for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) { my_wait_context.reserve(); } private: task* cancel(execution_data&) override { this->my_wait_context.release(); return nullptr; } protected: Iterator my_first; Iterator my_last; wait_context& my_wait_context; task_group_context& my_execution_context; const Body& my_body; feeder_holder my_feeder_holder; }; // class for_each_root_task_base /** parallel_for_each algorithm root task - most generic version * Splits input range to blocks @ingroup algorithms **/ template > class for_each_root_task : public for_each_root_task_base { using base_type = for_each_root_task_base; public: using base_type::base_type; private: task* execute(execution_data& ed) override { using block_handling_type = input_block_handling_task; if (this->my_first == this->my_last) { this->my_wait_context.release(); return nullptr; } this->my_wait_context.reserve(); small_object_allocator alloc{}; auto block_handling_task = alloc.new_object(ed, this->my_wait_context, this->my_execution_context, this->my_body, this->my_feeder_holder.feeder_ptr(), alloc); auto* block_iterator = block_handling_task->block_iteration_space.begin(); for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) { // Move semantics are automatically used when supported by the iterator new (block_iterator++) Item(*this->my_first); ++block_handling_task->my_size; } // Do not access this after spawn to avoid races spawn(*this, this->my_execution_context); return block_handling_task; } }; // class for_each_root_task - most generic implementation /** parallel_for_each algorithm root task - forward iterator based specialization * Splits input range to blocks @ingroup algorithms **/ template class for_each_root_task : public for_each_root_task_base { using base_type = for_each_root_task_base; public: using base_type::base_type; private: task* execute(execution_data& ed) override { using block_handling_type = forward_block_handling_task; if (this->my_first == this->my_last) { this->my_wait_context.release(); return nullptr; } std::size_t block_size{0}; Iterator first_block_element = this->my_first; for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) { ++block_size; } this->my_wait_context.reserve(); small_object_allocator alloc{}; auto block_handling_task = alloc.new_object(ed, first_block_element, block_size, this->my_wait_context, this->my_execution_context, this->my_body, this->my_feeder_holder.feeder_ptr(), alloc); // Do not access this after spawn to avoid races spawn(*this, this->my_execution_context); return block_handling_task; } }; // class for_each_root_task - forward iterator based specialization /** parallel_for_each algorithm root task - random access iterator based specialization * Splits input range to blocks @ingroup algorithms **/ template class for_each_root_task : public for_each_root_task_base { using base_type = for_each_root_task_base; public: using base_type::base_type; private: task* execute(execution_data&) override { tbb::parallel_for( tbb::blocked_range(0, std::distance(this->my_first, this->my_last)), parallel_for_body_wrapper(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr()) , this->my_execution_context ); this->my_wait_context.release(); return nullptr; } }; // class for_each_root_task - random access iterator based specialization /** Helper for getting item type. If item type can be deduced from feeder - got it from feeder, if feeder is generic - got item type from range. @ingroup algorithms */ template auto feeder_argument_parser(void (Body::*)(Item, feeder&) const) -> FeederArg; template decltype(feeder_argument_parser(&Body::operator())) get_item_type_impl(int); // for (T, feeder) template Item get_item_type_impl(...); // stub template using get_item_type = decltype(get_item_type_impl(0)); #if __TBB_CPP20_CONCEPTS_PRESENT template using feeder_item_type = std::remove_cvref_t>; template concept parallel_for_each_iterator_body = parallel_for_each_body, feeder_item_type>>; template concept parallel_for_each_range_body = parallel_for_each_body, feeder_item_type>>; #endif /** Implements parallel iteration over a range. @ingroup algorithms */ template void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context) { if (!(first == last)) { using ItemType = get_item_type::value_type>; wait_context w_context(0); for_each_root_task root_task(first, last, body, w_context, context); execute_and_wait(root_task, context, w_context, context); } } /** \page parallel_for_each_body_req Requirements on parallel_for_each body Class \c Body implementing the concept of parallel_for_each body must define: - \code B::operator()( cv_item_type item, feeder& feeder ) const OR B::operator()( cv_item_type& item ) const \endcode Process item. May be invoked concurrently for the same \c this but different \c item. - \code item_type( const item_type& ) \endcode Copy a work item. - \code ~item_type() \endcode Destroy a work item **/ /** \name parallel_for_each See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/ //@{ //! Parallel iteration over a range, with optional addition of more work. /** @ingroup algorithms */ template __TBB_requires(std::input_iterator && parallel_for_each_iterator_body) void parallel_for_each(Iterator first, Iterator last, const Body& body) { task_group_context context(PARALLEL_FOR_EACH); run_parallel_for_each(first, last, body, context); } template __TBB_requires(container_based_sequence && parallel_for_each_range_body) void parallel_for_each(Range& rng, const Body& body) { parallel_for_each(std::begin(rng), std::end(rng), body); } template __TBB_requires(container_based_sequence && parallel_for_each_range_body) void parallel_for_each(const Range& rng, const Body& body) { parallel_for_each(std::begin(rng), std::end(rng), body); } //! Parallel iteration over a range, with optional addition of more work and user-supplied context /** @ingroup algorithms */ template __TBB_requires(std::input_iterator && parallel_for_each_iterator_body) void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) { run_parallel_for_each(first, last, body, context); } template __TBB_requires(container_based_sequence && parallel_for_each_range_body) void parallel_for_each(Range& rng, const Body& body, task_group_context& context) { parallel_for_each(std::begin(rng), std::end(rng), body, context); } template __TBB_requires(container_based_sequence && parallel_for_each_range_body) void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) { parallel_for_each(std::begin(rng), std::end(rng), body, context); } } // namespace d2 } // namespace detail //! @endcond //@} inline namespace v1 { using detail::d2::parallel_for_each; using detail::d1::feeder; } // namespace v1 } // namespace tbb #endif /* __TBB_parallel_for_each_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_invoke.h000066400000000000000000000171471514453371700321320ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_invoke_H #define __TBB_parallel_invoke_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_exception.h" #include "detail/_task.h" #include "detail/_template_helpers.h" #include "detail/_small_object_pool.h" #include "task_group.h" #include #include #include namespace tbb { namespace detail { namespace d1 { //! Simple task object, executing user method template struct function_invoker : public task { function_invoker(const Function& function, WaitObject& wait_ctx) : my_function(function), parent_wait_ctx(wait_ctx) {} task* execute(execution_data& ed) override { my_function(); parent_wait_ctx.release(ed); call_itt_task_notify(destroy, this); return nullptr; } task* cancel(execution_data& ed) override { parent_wait_ctx.release(ed); return nullptr; } const Function& my_function; WaitObject& parent_wait_ctx; }; // struct function_invoker //! Task object for managing subroots in trinary task trees. // Endowed with additional synchronization logic (compatible with wait object intefaces) to support // continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors // and then executes first functor by itself. But only the last executed functor must destruct and deallocate // the subroot task. template struct invoke_subroot_task : public task { wait_context& root_wait_ctx; std::atomic ref_count{0}; bool child_spawned = false; const F1& self_invoked_functor; function_invoker> f2_invoker; function_invoker> f3_invoker; task_group_context& my_execution_context; small_object_allocator my_allocator; invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context, small_object_allocator& alloc) : root_wait_ctx(wait_ctx), self_invoked_functor(f1), f2_invoker(f2, *this), f3_invoker(f3, *this), my_execution_context(context), my_allocator(alloc) { root_wait_ctx.reserve(); } void finalize(const execution_data& ed) { root_wait_ctx.release(); my_allocator.delete_object(this, ed); } void release(const execution_data& ed) { __TBB_ASSERT(ref_count > 0, nullptr); call_itt_task_notify(releasing, this); if( --ref_count == 0 ) { call_itt_task_notify(acquired, this); finalize(ed); } } task* execute(execution_data& ed) override { ref_count.fetch_add(3, std::memory_order_relaxed); spawn(f3_invoker, my_execution_context); spawn(f2_invoker, my_execution_context); self_invoked_functor(); release(ed); return nullptr; } task* cancel(execution_data& ed) override { if( ref_count > 0 ) { // detect children spawn release(ed); } else { finalize(ed); } return nullptr; } }; // struct subroot_task class invoke_root_task { public: invoke_root_task(wait_context& wc) : my_wait_context(wc) {} void release(const execution_data&) { my_wait_context.release(); } private: wait_context& my_wait_context; }; template void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) { root_wait_ctx.reserve(1); invoke_root_task root(root_wait_ctx); function_invoker invoker1(f1, root); execute_and_wait(invoker1, context, root_wait_ctx, context); } template void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) { root_wait_ctx.reserve(2); invoke_root_task root(root_wait_ctx); function_invoker invoker1(f1, root); function_invoker invoker2(f2, root); spawn(invoker1, context); execute_and_wait(invoker2, context, root_wait_ctx, context); } template void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) { root_wait_ctx.reserve(3); invoke_root_task root(root_wait_ctx); function_invoker invoker1(f1, root); function_invoker invoker2(f2, root); function_invoker invoker3(f3, root); //TODO: implement sub root for two tasks (measure performance) spawn(invoker1, context); spawn(invoker2, context); execute_and_wait(invoker3, context, root_wait_ctx, context); } template void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) { small_object_allocator alloc{}; auto sub_root = alloc.new_object>(f1, f2, f3, root_wait_ctx, context, alloc); spawn(*sub_root, context); invoke_recursive_separation(root_wait_ctx, context, fs...); } template void parallel_invoke_impl(task_group_context& context, const Fs&... fs) { static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable"); wait_context root_wait_ctx{0}; invoke_recursive_separation(root_wait_ctx, context, fs...); } template void parallel_invoke_impl(const F1& f1, const Fs&... fs) { static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable"); task_group_context context(PARALLEL_INVOKE); wait_context root_wait_ctx{0}; invoke_recursive_separation(root_wait_ctx, context, fs..., f1); } //! Passes last argument of variadic pack as first for handling user provided task_group_context template struct invoke_helper; template struct invoke_helper, T, Fs...> : invoke_helper, Fs...> {}; template struct invoke_helper, T> { void operator()(Fs&&... args, T&& t) { parallel_invoke_impl(std::forward(t), std::forward(args)...); } }; //! Parallel execution of several function objects // We need to pass parameter pack through forwarding reference, // since this pack may contain task_group_context that must be passed via lvalue non-const reference template void parallel_invoke(Fs&&... fs) { invoke_helper, Fs...>()(std::forward(fs)...); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::parallel_invoke; } // namespace v1 } // namespace tbb #endif /* __TBB_parallel_invoke_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_pipeline.h000066400000000000000000000131731514453371700324370ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_pipeline_H #define __TBB_parallel_pipeline_H #include "detail/_pipeline_filters.h" #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "task_group.h" #include #include #include namespace tbb { namespace detail { namespace r1 { TBB_EXPORT void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); } namespace d1 { enum class filter_mode : unsigned int { //! processes multiple items in parallel and in no particular order parallel = base_filter::filter_is_out_of_order, //! processes items one at a time; all such filters process items in the same order serial_in_order = base_filter::filter_is_serial, //! processes items one at a time and in no particular order serial_out_of_order = base_filter::filter_is_serial | base_filter::filter_is_out_of_order }; //! Class representing a chain of type-safe pipeline filters /** @ingroup algorithms */ template class filter { filter_node_ptr my_root; filter( filter_node_ptr root ) : my_root(root) {} friend void parallel_pipeline( size_t, const filter&, task_group_context& ); template friend filter make_filter( filter_mode, const Body& ); template friend filter operator&( const filter&, const filter& ); public: filter() = default; filter( const filter& rhs ) : my_root(rhs.my_root) {} filter( filter&& rhs ) : my_root(std::move(rhs.my_root)) {} void operator=(const filter& rhs) { my_root = rhs.my_root; } void operator=( filter&& rhs ) { my_root = std::move(rhs.my_root); } template filter( filter_mode mode, const Body& body ) : my_root( new(r1::allocate_memory(sizeof(filter_node_leaf))) filter_node_leaf(static_cast(mode), body) ) { } filter& operator&=( const filter& right ) { *this = *this & right; return *this; } void clear() { // Like operator= with filter() on right side. my_root = nullptr; } }; //! Create a filter to participate in parallel_pipeline /** @ingroup algorithms */ template filter make_filter( filter_mode mode, const Body& body ) { return filter_node_ptr( new(r1::allocate_memory(sizeof(filter_node_leaf))) filter_node_leaf(static_cast(mode), body) ); } //! Create a filter to participate in parallel_pipeline /** @ingroup algorithms */ template filter, filter_output> make_filter( filter_mode mode, const Body& body ) { return make_filter, filter_output>(mode, body); } //! Composition of filters left and right. /** @ingroup algorithms */ template filter operator&( const filter& left, const filter& right ) { __TBB_ASSERT(left.my_root,"cannot use default-constructed filter as left argument of '&'"); __TBB_ASSERT(right.my_root,"cannot use default-constructed filter as right argument of '&'"); return filter_node_ptr( new (r1::allocate_memory(sizeof(filter_node))) filter_node(left.my_root,right.my_root) ); } #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT template filter(filter_mode, Body) ->filter, filter_output>; #endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT //! Parallel pipeline over chain of filters with user-supplied context. /** @ingroup algorithms **/ inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter& filter_chain, task_group_context& context) { r1::parallel_pipeline(context, max_number_of_live_tokens, *filter_chain.my_root); } //! Parallel pipeline over chain of filters. /** @ingroup algorithms **/ inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter& filter_chain) { task_group_context context; parallel_pipeline(max_number_of_live_tokens, filter_chain, context); } //! Parallel pipeline over sequence of filters. /** @ingroup algorithms **/ template void parallel_pipeline(size_t max_number_of_live_tokens, const F1& filter1, const F2& filter2, FiltersContext&&... filters) { parallel_pipeline(max_number_of_live_tokens, filter1 & filter2, std::forward(filters)...); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::parallel_pipeline; using detail::d1::filter; using detail::d1::make_filter; using detail::d1::filter_mode; using detail::d1::flow_control; } } // tbb #endif /* __TBB_parallel_pipeline_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_reduce.h000066400000000000000000001103261514453371700320770ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_reduce_H #define __TBB_parallel_reduce_H #include #include "detail/_namespace_injection.h" #include "detail/_task.h" #include "detail/_aligned_space.h" #include "detail/_small_object_pool.h" #include "detail/_range_common.h" #include "task_group.h" // task_group_context #include "partitioner.h" #include "profiling.h" namespace tbb { namespace detail { #if __TBB_CPP20_CONCEPTS_PRESENT inline namespace d0 { template concept parallel_reduce_body = splittable && requires( Body& body, const Range& range, Body& rhs ) { body(range); body.join(rhs); }; template concept parallel_reduce_function = requires( const std::remove_reference_t& func, const Range& range, const Value& value ) { { func(range, value) } -> std::convertible_to; }; template concept parallel_reduce_combine = requires( const std::remove_reference_t& combine, const Value& lhs, const Value& rhs ) { { combine(lhs, rhs) } -> std::convertible_to; }; } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT namespace d1 { //! Tree node type for parallel_reduce. /** @ingroup algorithms */ //TODO: consider folding tree via bypass execution(instead of manual folding) // for better cancellation and critical tasks handling (performance measurements required). template struct reduction_tree_node : public tree_node { tbb::detail::aligned_space zombie_space; Body& left_body; bool has_right_zombie{false}; reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : tree_node{parent, ref_count, alloc}, left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ {} void join(task_group_context* context) { if (has_right_zombie && !context->is_group_execution_cancelled()) left_body.join(*zombie_space.begin()); } ~reduction_tree_node() { if( has_right_zombie ) zombie_space.begin()->~Body(); } }; //! Task type used to split the work of parallel_reduce. /** @ingroup algorithms */ template struct start_reduce : public task { Range my_range; Body* my_body; node* my_parent; typename Partitioner::task_partition_type my_partition; small_object_allocator my_allocator; bool is_right_child; task* execute(execution_data&) override; task* cancel(execution_data&) override; void finalize(const execution_data&); using tree_node_type = reduction_tree_node; //! Constructor reduce root task. start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : my_range(range), my_body(&body), my_partition(partitioner), my_allocator(alloc), is_right_child(false) {} //! Splitting constructor used to generate children. /** parent_ becomes left child. Newly constructed object is right child. */ start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(parent_.my_body), my_partition(parent_.my_partition, split_obj), my_allocator(alloc), is_right_child(true) { parent_.is_right_child = false; } //! Construct right child from the given range as response to the demand. /** parent_ remains left child. Newly constructed object is right child. */ start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : my_range(r), my_body(parent_.my_body), my_partition(parent_.my_partition, split()), my_allocator(alloc), is_right_child(true) { my_partition.align_depth( d ); parent_.is_right_child = false; } static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { if ( !range.empty() ) { wait_node wn; small_object_allocator alloc{}; auto reduce_task = alloc.new_object(range, body, partitioner, alloc); reduce_task->my_parent = &wn; execute_and_wait(*reduce_task, context, wn.m_wait, context); } } static void run(const Range& range, Body& body, Partitioner& partitioner) { // Bound context prevents exceptions from body to affect nesting or sibling algorithms, // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. task_group_context context(PARALLEL_REDUCE); run(range, body, partitioner, context); } //! Run body for range, serves as callback for partitioner void run_body( Range &r ) { (*my_body)(r); } //! spawn right task, serves as callback for partitioner void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { offer_work_impl(ed, *this, split_obj); } //! spawn right task, serves as callback for partitioner void offer_work(const Range& r, depth_t d, execution_data& ed) { offer_work_impl(ed, *this, r, d); } private: template void offer_work_impl(execution_data& ed, Args&&... args) { small_object_allocator alloc{}; // New right child auto right_child = alloc.new_object(ed, std::forward(args)..., alloc); // New root node as a continuation and ref count. Left and right child attach to the new parent. right_child->my_parent = my_parent = alloc.new_object(ed, my_parent, 2, *my_body, alloc); // Spawn the right sibling right_child->spawn_self(ed); } void spawn_self(execution_data& ed) { my_partition.spawn_task(*this, *context(ed)); } }; //! fold the tree and deallocate the task template void start_reduce::finalize(const execution_data& ed) { // Get the current parent and wait object before an object destruction node* parent = my_parent; auto allocator = my_allocator; // Task execution finished - destroy it this->~start_reduce(); // Unwind the tree decrementing the parent`s reference count fold_tree(parent, ed); allocator.deallocate(this, ed); } //! Execute parallel_reduce task template task* start_reduce::execute(execution_data& ed) { if (!is_same_affinity(ed)) { my_partition.note_affinity(execution_slot(ed)); } my_partition.check_being_stolen(*this, ed); // The acquire barrier synchronizes the data pointed with my_body if the left // task has already finished. if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { tree_node_type* parent_ptr = static_cast(my_parent); my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); parent_ptr->has_right_zombie = true; } __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); my_partition.execute(*this, my_range, ed); finalize(ed); return nullptr; } //! Cancel parallel_reduce task template task* start_reduce::cancel(execution_data& ed) { finalize(ed); return nullptr; } //! Tree node type for parallel_deterministic_reduce. /** @ingroup algorithms */ template struct deterministic_reduction_tree_node : public tree_node { Body right_body; Body& left_body; deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : tree_node{parent, ref_count, alloc}, right_body{input_left_body, detail::split()}, left_body(input_left_body) {} void join(task_group_context* context) { if (!context->is_group_execution_cancelled()) left_body.join(right_body); } }; //! Task type used to split the work of parallel_deterministic_reduce. /** @ingroup algorithms */ template struct start_deterministic_reduce : public task { Range my_range; Body& my_body; node* my_parent; typename Partitioner::task_partition_type my_partition; small_object_allocator my_allocator; task* execute(execution_data&) override; task* cancel(execution_data&) override; void finalize(const execution_data&); using tree_node_type = deterministic_reduction_tree_node; //! Constructor deterministic_reduce root task. start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : my_range(range), my_body(body), my_partition(partitioner), my_allocator(alloc) {} //! Splitting constructor used to generate children. /** parent_ becomes left child. Newly constructed object is right child. */ start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, small_object_allocator& alloc ) : my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(body), my_partition(parent_.my_partition, split_obj), my_allocator(alloc) {} static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { if ( !range.empty() ) { wait_node wn; small_object_allocator alloc{}; auto deterministic_reduce_task = alloc.new_object(range, partitioner, body, alloc); deterministic_reduce_task->my_parent = &wn; execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); } } static void run(const Range& range, Body& body, Partitioner& partitioner) { // Bound context prevents exceptions from body to affect nesting or sibling algorithms, // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce // in the try-block. task_group_context context(PARALLEL_REDUCE); run(range, body, partitioner, context); } //! Run body for range, serves as callback for partitioner void run_body( Range &r ) { my_body( r ); } //! Spawn right task, serves as callback for partitioner void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { offer_work_impl(ed, *this, split_obj); } private: template void offer_work_impl(execution_data& ed, Args&&... args) { small_object_allocator alloc{}; // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. auto new_tree_node = alloc.new_object(ed, my_parent, 2, my_body, alloc); // New right child auto right_child = alloc.new_object(ed, std::forward(args)..., new_tree_node->right_body, alloc); right_child->my_parent = my_parent = new_tree_node; // Spawn the right sibling right_child->spawn_self(ed); } void spawn_self(execution_data& ed) { my_partition.spawn_task(*this, *context(ed)); } }; //! Fold the tree and deallocate the task template void start_deterministic_reduce::finalize(const execution_data& ed) { // Get the current parent and wait object before an object destruction node* parent = my_parent; auto allocator = my_allocator; // Task execution finished - destroy it this->~start_deterministic_reduce(); // Unwind the tree decrementing the parent`s reference count fold_tree(parent, ed); allocator.deallocate(this, ed); } //! Execute parallel_deterministic_reduce task template task* start_deterministic_reduce::execute(execution_data& ed) { if (!is_same_affinity(ed)) { my_partition.note_affinity(execution_slot(ed)); } my_partition.check_being_stolen(*this, ed); my_partition.execute(*this, my_range, ed); finalize(ed); return NULL; } //! Cancel parallel_deterministic_reduce task template task* start_deterministic_reduce::cancel(execution_data& ed) { finalize(ed); return NULL; } //! Auxiliary class for parallel_reduce; for internal use only. /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" using given \ref parallel_reduce_lambda_req "anonymous function objects". **/ /** @ingroup algorithms */ template class lambda_reduce_body { //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced // (might require some performance measurements) const Value& my_identity_element; const RealBody& my_real_body; const Reduction& my_reduction; Value my_value; lambda_reduce_body& operator= ( const lambda_reduce_body& other ); public: lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) : my_identity_element(identity) , my_real_body(body) , my_reduction(reduction) , my_value(identity) { } lambda_reduce_body( const lambda_reduce_body& other ) = default; lambda_reduce_body( lambda_reduce_body& other, tbb::split ) : my_identity_element(other.my_identity_element) , my_real_body(other.my_real_body) , my_reduction(other.my_reduction) , my_value(other.my_identity_element) { } void operator()(Range& range) { my_value = my_real_body(range, const_cast(my_value)); } void join( lambda_reduce_body& rhs ) { my_value = my_reduction(const_cast(my_value), const_cast(rhs.my_value)); } Value result() const { return my_value; } }; // Requirements on Range concept are documented in blocked_range.h /** \page parallel_reduce_body_req Requirements on parallel_reduce body Class \c Body implementing the concept of parallel_reduce body must define: - \code Body::Body( Body&, split ); \endcode Splitting constructor. Must be able to run concurrently with operator() and method \c join - \code Body::~Body(); \endcode Destructor - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r and accumulating the result - \code void Body::join( Body& b ); \endcode Join results. The result in \c b should be merged into the result of \c this **/ /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) TO BE DOCUMENTED **/ /** \name parallel_reduce See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ //@{ //! Parallel iteration with reduction and default partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body ) { start_reduce::run( range, body, __TBB_DEFAULT_PARTITIONER() ); } //! Parallel iteration with reduction and simple_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { start_reduce::run( range, body, partitioner ); } //! Parallel iteration with reduction and auto_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { start_reduce::run( range, body, partitioner ); } //! Parallel iteration with reduction and static_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { start_reduce::run( range, body, partitioner ); } //! Parallel iteration with reduction and affinity_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { start_reduce::run( range, body, partitioner ); } //! Parallel iteration with reduction, default partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { start_reduce::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); } //! Parallel iteration with reduction, simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { start_reduce::run( range, body, partitioner, context ); } //! Parallel iteration with reduction, auto_partitioner and user-supplied context /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { start_reduce::run( range, body, partitioner, context ); } //! Parallel iteration with reduction, static_partitioner and user-supplied context /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { start_reduce::run( range, body, partitioner, context ); } //! Parallel iteration with reduction, affinity_partitioner and user-supplied context /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { start_reduce::run( range, body, partitioner, context ); } /** parallel_reduce overloads that work with anonymous function objects (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ //! Parallel iteration with reduction and default partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const __TBB_DEFAULT_PARTITIONER> ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); return body.result(); } //! Parallel iteration with reduction and simple_partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const simple_partitioner> ::run(range, body, partitioner ); return body.result(); } //! Parallel iteration with reduction and auto_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const auto_partitioner& partitioner ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const auto_partitioner> ::run( range, body, partitioner ); return body.result(); } //! Parallel iteration with reduction and static_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const static_partitioner> ::run( range, body, partitioner ); return body.result(); } //! Parallel iteration with reduction and affinity_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, affinity_partitioner& partitioner ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,affinity_partitioner> ::run( range, body, partitioner ); return body.result(); } //! Parallel iteration with reduction, default partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const __TBB_DEFAULT_PARTITIONER> ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); return body.result(); } //! Parallel iteration with reduction, simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const simple_partitioner> ::run( range, body, partitioner, context ); return body.result(); } //! Parallel iteration with reduction, auto_partitioner and user-supplied context /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const auto_partitioner& partitioner, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const auto_partitioner> ::run( range, body, partitioner, context ); return body.result(); } //! Parallel iteration with reduction, static_partitioner and user-supplied context /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,const static_partitioner> ::run( range, body, partitioner, context ); return body.result(); } //! Parallel iteration with reduction, affinity_partitioner and user-supplied context /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, affinity_partitioner& partitioner, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_reduce,affinity_partitioner> ::run( range, body, partitioner, context ); return body.result(); } //! Parallel iteration with deterministic reduction and default simple partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_deterministic_reduce( const Range& range, Body& body ) { start_deterministic_reduce::run(range, body, simple_partitioner()); } //! Parallel iteration with deterministic reduction and simple partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { start_deterministic_reduce::run(range, body, partitioner); } //! Parallel iteration with deterministic reduction and static partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { start_deterministic_reduce::run(range, body, partitioner); } //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { start_deterministic_reduce::run( range, body, simple_partitioner(), context ); } //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { start_deterministic_reduce::run(range, body, partitioner, context); } //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_body) void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { start_deterministic_reduce::run(range, body, partitioner, context); } /** parallel_reduce overloads that work with anonymous function objects (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ //! Parallel iteration with deterministic reduction and default simple partitioner. // TODO: consider making static_partitioner the default /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); } //! Parallel iteration with deterministic reduction and simple partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const simple_partitioner> ::run(range, body, partitioner); return body.result(); } //! Parallel iteration with deterministic reduction and static partitioner. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const static_partitioner> ::run(range, body, partitioner); return body.result(); } //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, task_group_context& context ) { return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); } //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const simple_partitioner> ::run(range, body, partitioner, context); return body.result(); } //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_reduce_function && parallel_reduce_combine) Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner, task_group_context& context ) { lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const static_partitioner> ::run(range, body, partitioner, context); return body.result(); } //@} } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::parallel_reduce; using detail::d1::parallel_deterministic_reduce; // Split types using detail::split; using detail::proportional_split; } // namespace v1 } // namespace tbb #endif /* __TBB_parallel_reduce_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_scan.h000066400000000000000000000544321514453371700315610ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_scan_H #define __TBB_parallel_scan_H #include #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_exception.h" #include "detail/_task.h" #include "profiling.h" #include "partitioner.h" #include "blocked_range.h" #include "task_group.h" namespace tbb { namespace detail { namespace d1 { //! Used to indicate that the initial scan is being performed. /** @ingroup algorithms */ struct pre_scan_tag { static bool is_final_scan() {return false;} operator bool() {return is_final_scan();} }; //! Used to indicate that the final scan is being performed. /** @ingroup algorithms */ struct final_scan_tag { static bool is_final_scan() {return true;} operator bool() {return is_final_scan();} }; template struct sum_node; #if __TBB_CPP20_CONCEPTS_PRESENT } // namespace d1 namespace d0 { template concept parallel_scan_body = splittable && requires( Body& body, const Range& range, Body& other ) { body(range, tbb::detail::d1::pre_scan_tag{}); body(range, tbb::detail::d1::final_scan_tag{}); body.reverse_join(other); body.assign(other); }; template concept parallel_scan_function = requires( const std::remove_reference_t& func, const Range& range, const Value& value ) { { func(range, value, true) } -> std::convertible_to; }; template concept parallel_scan_combine = requires( const std::remove_reference_t& combine, const Value& lhs, const Value& rhs ) { { combine(lhs, rhs) } -> std::convertible_to; }; } // namespace d0 namespace d1 { #endif // __TBB_CPP20_CONCEPTS_PRESENT //! Performs final scan for a leaf /** @ingroup algorithms */ template struct final_sum : public task { private: using sum_node_type = sum_node; Body m_body; aligned_space m_range; //! Where to put result of last subrange, or nullptr if not last subrange. Body* m_stuff_last; wait_context& m_wait_context; sum_node_type* m_parent = nullptr; public: small_object_allocator m_allocator; final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) : m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) { poison_pointer(m_stuff_last); } final_sum( final_sum& sum, small_object_allocator& alloc ) : m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) { poison_pointer(m_stuff_last); } ~final_sum() { m_range.begin()->~Range(); } void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) { __TBB_ASSERT( m_parent == nullptr, nullptr ); m_parent = parent; new( m_range.begin() ) Range(range); m_stuff_last = stuff_last; } private: sum_node_type* release_parent() { call_itt_task_notify(releasing, m_parent); if (m_parent) { auto parent = m_parent; m_parent = nullptr; if (parent->ref_count.fetch_sub(1) == 1) { return parent; } } else m_wait_context.release(); return nullptr; } sum_node_type* finalize(const execution_data& ed){ sum_node_type* next_task = release_parent(); m_allocator.delete_object(this, ed); return next_task; } public: task* execute(execution_data& ed) override { m_body( *m_range.begin(), final_scan_tag() ); if( m_stuff_last ) m_stuff_last->assign(m_body); return finalize(ed); } task* cancel(execution_data& ed) override { return finalize(ed); } template void operator()( const Range& r, Tag tag ) { m_body( r, tag ); } void reverse_join( final_sum& a ) { m_body.reverse_join(a.m_body); } void reverse_join( Body& body ) { m_body.reverse_join(body); } void assign_to( Body& body ) { body.assign(m_body); } void self_destroy(const execution_data& ed) { m_allocator.delete_object(this, ed); } }; //! Split work to be done in the scan. /** @ingroup algorithms */ template struct sum_node : public task { private: using final_sum_type = final_sum; public: final_sum_type *m_incoming; final_sum_type *m_body; Body *m_stuff_last; private: final_sum_type *m_left_sum; sum_node *m_left; sum_node *m_right; bool m_left_is_final; Range m_range; wait_context& m_wait_context; sum_node* m_parent; small_object_allocator m_allocator; public: std::atomic ref_count{0}; sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) : m_stuff_last(nullptr), m_left_sum(nullptr), m_left(nullptr), m_right(nullptr), m_left_is_final(left_is_final_), m_range(range), m_wait_context(w_o), m_parent(parent), m_allocator(alloc) { if( m_parent ) m_parent->ref_count.fetch_add(1); // Poison fields that will be set by second pass. poison_pointer(m_body); poison_pointer(m_incoming); } ~sum_node() { if (m_parent) m_parent->ref_count.fetch_sub(1); } private: sum_node* release_parent() { call_itt_task_notify(releasing, m_parent); if (m_parent) { auto parent = m_parent; m_parent = nullptr; if (parent->ref_count.fetch_sub(1) == 1) { return parent; } } else m_wait_context.release(); return nullptr; } task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) { if( child ) { __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr ); child->prepare_for_execution(body, incoming, stuff_last); return child; } else { body.finish_construction(this, range, stuff_last); return &body; } } sum_node* finalize(const execution_data& ed) { sum_node* next_task = release_parent(); m_allocator.delete_object(this, ed); return next_task; } public: void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) { this->m_body = &body; this->m_incoming = incoming; this->m_stuff_last = stuff_last; } task* execute(execution_data& ed) override { if( m_body ) { if( m_incoming ) m_left_sum->reverse_join( *m_incoming ); task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last); task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr); ref_count = (left_child != nullptr) + (right_child != nullptr); m_body = nullptr; if( left_child ) { spawn(*right_child, *ed.context); return left_child; } else { return right_child; } } else { return finalize(ed); } } task* cancel(execution_data& ed) override { return finalize(ed); } void self_destroy(const execution_data& ed) { m_allocator.delete_object(this, ed); } template friend struct start_scan; template friend struct finish_scan; }; //! Combine partial results /** @ingroup algorithms */ template struct finish_scan : public task { private: using sum_node_type = sum_node; using final_sum_type = final_sum; final_sum_type** const m_sum_slot; sum_node_type*& m_return_slot; small_object_allocator m_allocator; public: std::atomic m_right_zombie; sum_node_type& m_result; std::atomic ref_count{2}; finish_scan* m_parent; wait_context& m_wait_context; task* execute(execution_data& ed) override { __TBB_ASSERT( m_result.ref_count.load() == static_cast((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr ); if( m_result.m_left ) m_result.m_left_is_final = false; final_sum_type* right_zombie = m_right_zombie.load(std::memory_order_acquire); if( right_zombie && m_sum_slot ) (*m_sum_slot)->reverse_join(*m_result.m_left_sum); __TBB_ASSERT( !m_return_slot, nullptr ); if( right_zombie || m_result.m_right ) { m_return_slot = &m_result; } else { m_result.self_destroy(ed); } if( right_zombie && !m_sum_slot && !m_result.m_right ) { right_zombie->self_destroy(ed); m_right_zombie.store(nullptr, std::memory_order_relaxed); } return finalize(ed); } task* cancel(execution_data& ed) override { return finalize(ed); } finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) : m_sum_slot(sum), m_return_slot(return_slot), m_allocator(alloc), m_right_zombie(nullptr), m_result(result_), m_parent(parent), m_wait_context(w_o) { __TBB_ASSERT( !m_return_slot, nullptr ); } private: finish_scan* release_parent() { call_itt_task_notify(releasing, m_parent); if (m_parent) { auto parent = m_parent; m_parent = nullptr; if (parent->ref_count.fetch_sub(1) == 1) { return parent; } } else m_wait_context.release(); return nullptr; } finish_scan* finalize(const execution_data& ed) { finish_scan* next_task = release_parent(); m_allocator.delete_object(this, ed); return next_task; } }; //! Initial task to split the work /** @ingroup algorithms */ template struct start_scan : public task { private: using sum_node_type = sum_node; using final_sum_type = final_sum; using finish_pass1_type = finish_scan; std::reference_wrapper m_return_slot; Range m_range; std::reference_wrapper m_body; typename Partitioner::partition_type m_partition; /** Non-null if caller is requesting total. */ final_sum_type** m_sum_slot; bool m_is_final; bool m_is_right_child; finish_pass1_type* m_parent; small_object_allocator m_allocator; wait_context& m_wait_context; finish_pass1_type* release_parent() { call_itt_task_notify(releasing, m_parent); if (m_parent) { auto parent = m_parent; m_parent = nullptr; if (parent->ref_count.fetch_sub(1) == 1) { return parent; } } else m_wait_context.release(); return nullptr; } finish_pass1_type* finalize( const execution_data& ed ) { finish_pass1_type* next_task = release_parent(); m_allocator.delete_object(this, ed); return next_task; } public: task* execute( execution_data& ) override; task* cancel( execution_data& ed ) override { return finalize(ed); } start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) : m_return_slot(return_slot), m_range(parent.m_range,split()), m_body(parent.m_body), m_partition(parent.m_partition,split()), m_sum_slot(parent.m_sum_slot), m_is_final(parent.m_is_final), m_is_right_child(true), m_parent(parent.m_parent), m_allocator(alloc), m_wait_context(parent.m_wait_context) { __TBB_ASSERT( !m_return_slot, nullptr ); parent.m_is_right_child = false; } start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) : m_return_slot(return_slot), m_range(range), m_body(body), m_partition(partitioner), m_sum_slot(nullptr), m_is_final(true), m_is_right_child(false), m_parent(nullptr), m_allocator(alloc), m_wait_context(w_o) { __TBB_ASSERT( !m_return_slot, nullptr ); } static void run( const Range& range, Body& body, const Partitioner& partitioner ) { if( !range.empty() ) { task_group_context context(PARALLEL_SCAN); using start_pass1_type = start_scan; sum_node_type* root = nullptr; wait_context w_ctx{1}; small_object_allocator alloc{}; auto& temp_body = *alloc.new_object(body, w_ctx, alloc); temp_body.reverse_join(body); auto& pass1 = *alloc.new_object(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc); execute_and_wait(pass1, context, w_ctx, context); if( root ) { root->prepare_for_execution(temp_body, nullptr, &body); w_ctx.reserve(); execute_and_wait(*root, context, w_ctx, context); } else { temp_body.assign_to(body); temp_body.finish_construction(nullptr, range, nullptr); alloc.delete_object(&temp_body); } } } }; template task* start_scan::execute( execution_data& ed ) { // Inspecting m_parent->result.left_sum would ordinarily be a race condition. // But we inspect it only if we are not a stolen task, in which case we // know that task assigning to m_parent->result.left_sum has completed. __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan"); bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum); if( treat_as_stolen ) { // Invocation is for right child that has been really stolen or needs to be virtually stolen small_object_allocator alloc{}; final_sum_type* right_zombie = alloc.new_object(m_body, alloc); m_parent->m_right_zombie.store(right_zombie, std::memory_order_release); m_body = *right_zombie; m_is_final = false; } task* next_task = nullptr; if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) { if( m_is_final ) m_body(m_range, final_scan_tag()); else if( m_sum_slot ) m_body(m_range, pre_scan_tag()); if( m_sum_slot ) *m_sum_slot = &m_body.get(); __TBB_ASSERT( !m_return_slot, nullptr ); next_task = finalize(ed); } else { small_object_allocator alloc{}; auto result = alloc.new_object(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc); auto new_parent = alloc.new_object(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc); m_parent = new_parent; // Split off right child auto& right_child = *alloc.new_object(/*m_return_slot=*/result->m_right, *this, alloc); spawn(right_child, *ed.context); m_sum_slot = &result->m_left_sum; m_return_slot = result->m_left; __TBB_ASSERT( !m_return_slot, nullptr ); next_task = this; } return next_task; } template class lambda_scan_body { Value m_sum_slot; const Value& identity_element; const Scan& m_scan; const ReverseJoin& m_reverse_join; public: void operator=(const lambda_scan_body&) = delete; lambda_scan_body(const lambda_scan_body&) = default; lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join ) : m_sum_slot(identity) , identity_element(identity) , m_scan(scan) , m_reverse_join(rev_join) {} lambda_scan_body( lambda_scan_body& b, split ) : m_sum_slot(b.identity_element) , identity_element(b.identity_element) , m_scan(b.m_scan) , m_reverse_join(b.m_reverse_join) {} template void operator()( const Range& r, Tag tag ) { m_sum_slot = m_scan(r, m_sum_slot, tag); } void reverse_join( lambda_scan_body& a ) { m_sum_slot = m_reverse_join(a.m_sum_slot, m_sum_slot); } void assign( lambda_scan_body& b ) { m_sum_slot = b.m_sum_slot; } Value result() const { return m_sum_slot; } }; // Requirements on Range concept are documented in blocked_range.h /** \page parallel_scan_body_req Requirements on parallel_scan body Class \c Body implementing the concept of parallel_scan body must define: - \code Body::Body( Body&, split ); \endcode Splitting constructor. Split \c b so that \c this and \c b can accumulate separately - \code Body::~Body(); \endcode Destructor - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode Preprocess iterations for range \c r - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode Do final processing for iterations of range \c r - \code void Body::reverse_join( Body& a ); \endcode Merge preprocessing state of \c a into \c this, where \c a was created earlier from \c b by b's splitting constructor **/ /** \name parallel_scan See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ //@{ //! Parallel prefix with default partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_scan_body) void parallel_scan( const Range& range, Body& body ) { start_scan::run(range,body,__TBB_DEFAULT_PARTITIONER()); } //! Parallel prefix with simple_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_scan_body) void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { start_scan::run(range, body, partitioner); } //! Parallel prefix with auto_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_scan_body) void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { start_scan::run(range, body, partitioner); } //! Parallel prefix with default partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_scan_function && parallel_scan_combine) Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) { lambda_scan_body body(identity, scan, reverse_join); parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER()); return body.result(); } //! Parallel prefix with simple_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_scan_function && parallel_scan_combine) Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) { lambda_scan_body body(identity, scan, reverse_join); parallel_scan(range, body, partitioner); return body.result(); } //! Parallel prefix with auto_partitioner /** @ingroup algorithms **/ template __TBB_requires(tbb_range && parallel_scan_function && parallel_scan_combine) Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) { lambda_scan_body body(identity, scan, reverse_join); parallel_scan(range, body, partitioner); return body.result(); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::parallel_scan; using detail::d1::pre_scan_tag; using detail::d1::final_scan_tag; } // namespace v1 } // namespace tbb #endif /* __TBB_parallel_scan_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/parallel_sort.h000066400000000000000000000264301514453371700316210ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_parallel_sort_H #define __TBB_parallel_sort_H #include "detail/_namespace_injection.h" #include "parallel_for.h" #include "blocked_range.h" #include "profiling.h" #include #include #include #include namespace tbb { namespace detail { #if __TBB_CPP20_CONCEPTS_PRESENT inline namespace d0 { // TODO: consider using std::strict_weak_order concept template concept compare = requires( const std::remove_reference_t& comp, typename std::iterator_traits::reference value ) { // Forward via iterator_traits::reference { comp(typename std::iterator_traits::reference(value), typename std::iterator_traits::reference(value)) } -> std::convertible_to; }; // Inspired by std::__PartiallyOrderedWith exposition only concept template concept less_than_comparable = requires( const std::remove_reference_t& lhs, const std::remove_reference_t& rhs ) { { lhs < rhs } -> boolean_testable; }; } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT namespace d1 { //! Range used in quicksort to split elements into subranges based on a value. /** The split operation selects a splitter and places all elements less than or equal to the value in the first range and the remaining elements in the second range. @ingroup algorithms */ template class quick_sort_range { std::size_t median_of_three( const RandomAccessIterator& array, std::size_t l, std::size_t m, std::size_t r ) const { return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp(array[l], array[r]) ? r : l ) ) : ( comp(array[r], array[m]) ? m : ( comp(array[r], array[l]) ? r : l ) ); } std::size_t pseudo_median_of_nine( const RandomAccessIterator& array, const quick_sort_range& range ) const { std::size_t offset = range.size / 8u; return median_of_three(array, median_of_three(array, 0 , offset, offset * 2), median_of_three(array, offset * 3, offset * 4, offset * 5), median_of_three(array, offset * 6, offset * 7, range.size - 1)); } std::size_t split_range( quick_sort_range& range ) { RandomAccessIterator array = range.begin; RandomAccessIterator first_element = range.begin; std::size_t m = pseudo_median_of_nine(array, range); if( m != 0 ) std::iter_swap(array, array + m); std::size_t i = 0; std::size_t j = range.size; // Partition interval [i + 1,j - 1] with key *first_element. for(;;) { __TBB_ASSERT( i < j, nullptr ); // Loop must terminate since array[l] == *first_element. do { --j; __TBB_ASSERT( i <= j, "bad ordering relation?" ); } while( comp(*first_element, array[j]) ); do { __TBB_ASSERT( i <= j, nullptr ); if( i == j ) goto partition; ++i; } while( comp(array[i], *first_element) ); if( i == j ) goto partition; std::iter_swap(array + i, array + j); } partition: // Put the partition key were it belongs std::iter_swap(array + j, first_element); // array[l..j) is less or equal to key. // array(j..r) is greater or equal to key. // array[j] is equal to key i = j + 1; std::size_t new_range_size = range.size - i; range.size = j; return new_range_size; } public: quick_sort_range() = default; quick_sort_range( const quick_sort_range& ) = default; void operator=( const quick_sort_range& ) = delete; static constexpr std::size_t grainsize = 500; const Compare& comp; std::size_t size; RandomAccessIterator begin; quick_sort_range( RandomAccessIterator begin_, std::size_t size_, const Compare& comp_ ) : comp(comp_), size(size_), begin(begin_) {} bool empty() const { return size == 0; } bool is_divisible() const { return size >= grainsize; } quick_sort_range( quick_sort_range& range, split ) : comp(range.comp) , size(split_range(range)) // +1 accounts for the pivot element, which is at its correct place // already and, therefore, is not included into subranges. , begin(range.begin + range.size + 1) {} }; //! Body class used to test if elements in a range are presorted /** @ingroup algorithms */ template class quick_sort_pretest_body { const Compare& comp; task_group_context& context; public: quick_sort_pretest_body() = default; quick_sort_pretest_body( const quick_sort_pretest_body& ) = default; void operator=( const quick_sort_pretest_body& ) = delete; quick_sort_pretest_body( const Compare& _comp, task_group_context& _context ) : comp(_comp), context(_context) {} void operator()( const blocked_range& range ) const { RandomAccessIterator my_end = range.end(); int i = 0; //TODO: consider using std::is_sorted() for each 64 iterations (requires performance measurements) for( RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i ) { if( i % 64 == 0 && context.is_group_execution_cancelled() ) break; // The k - 1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1 if( comp(*(k), *(k - 1)) ) { context.cancel_group_execution(); break; } } } }; //! Body class used to sort elements in a range that is smaller than the grainsize. /** @ingroup algorithms */ template struct quick_sort_body { void operator()( const quick_sort_range& range ) const { std::sort(range.begin, range.begin + range.size, range.comp); } }; //! Method to perform parallel_for based quick sort. /** @ingroup algorithms */ template void do_parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { parallel_for(quick_sort_range(begin, end - begin, comp), quick_sort_body(), auto_partitioner()); } //! Wrapper method to initiate the sort by calling parallel_for. /** @ingroup algorithms */ template void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { task_group_context my_context(PARALLEL_SORT); constexpr int serial_cutoff = 9; __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" ); RandomAccessIterator k = begin; for( ; k != begin + serial_cutoff; ++k ) { if( comp(*(k + 1), *k) ) { do_parallel_quick_sort(begin, end, comp); return; } } // Check is input range already sorted parallel_for(blocked_range(k + 1, end), quick_sort_pretest_body(comp, my_context), auto_partitioner(), my_context); if( my_context.is_group_execution_cancelled() ) do_parallel_quick_sort(begin, end, comp); } /** \page parallel_sort_iter_req Requirements on iterators for parallel_sort Requirements on the iterator type \c It and its value type \c T for \c parallel_sort: - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given iterators \c a and \c b are pointing to. \c It should be a random access iterator. - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y; **/ /** \name parallel_sort See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/ //@{ #if __TBB_CPP20_CONCEPTS_PRESENT template using iter_value_type = typename std::iterator_traits::value_type; template using range_value_type = typename std::iterator_traits>::value_type; #endif //! Sorts the data in [begin,end) using the given comparator /** The compare function object is used for all comparisons between elements during sorting. The compare object must define a bool operator() function. @ingroup algorithms **/ template __TBB_requires(std::random_access_iterator && compare && std::movable>) void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { constexpr int min_parallel_size = 500; if( end > begin ) { if( end - begin < min_parallel_size ) { std::sort(begin, end, comp); } else { parallel_quick_sort(begin, end, comp); } } } //! Sorts the data in [begin,end) with a default comparator \c std::less /** @ingroup algorithms **/ template __TBB_requires(std::random_access_iterator && less_than_comparable> && std::movable>) void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) { parallel_sort(begin, end, std::less::value_type>()); } //! Sorts the data in rng using the given comparator /** @ingroup algorithms **/ template __TBB_requires(container_based_sequence && compare> && std::movable>) void parallel_sort( Range&& rng, const Compare& comp ) { parallel_sort(std::begin(rng), std::end(rng), comp); } //! Sorts the data in rng with a default comparator \c std::less /** @ingroup algorithms **/ template __TBB_requires(container_based_sequence && less_than_comparable> && std::movable>) void parallel_sort( Range&& rng ) { parallel_sort(std::begin(rng), std::end(rng)); } //@} } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::parallel_sort; } // namespace v1 } // namespace tbb #endif /*__TBB_parallel_sort_H*/ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/partitioner.h000066400000000000000000000672011514453371700313170ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_partitioner_H #define __TBB_partitioner_H #ifndef __TBB_INITIAL_CHUNKS // initial task divisions per thread #define __TBB_INITIAL_CHUNKS 2 #endif #ifndef __TBB_RANGE_POOL_CAPACITY // maximum number of elements in range pool #define __TBB_RANGE_POOL_CAPACITY 8 #endif #ifndef __TBB_INIT_DEPTH // initial value for depth of range pool #define __TBB_INIT_DEPTH 5 #endif #ifndef __TBB_DEMAND_DEPTH_ADD // when imbalance is found range splits this value times more #define __TBB_DEMAND_DEPTH_ADD 1 #endif #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_aligned_space.h" #include "detail/_utils.h" #include "detail/_template_helpers.h" #include "detail/_range_common.h" #include "detail/_task.h" #include "detail/_small_object_pool.h" #include "cache_aligned_allocator.h" #include "task_group.h" // task_group_context #include "task_arena.h" #include #include #include #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) // Workaround for overzealous compiler warnings #pragma warning (push) #pragma warning (disable: 4244) #endif namespace tbb { namespace detail { namespace d1 { class auto_partitioner; class simple_partitioner; class static_partitioner; class affinity_partitioner; class affinity_partition_type; class affinity_partitioner_base; inline std::size_t get_initial_auto_partitioner_divisor() { const std::size_t factor = 4; return factor * max_concurrency(); } //! Defines entry point for affinity partitioner into oneTBB run-time library. class affinity_partitioner_base: no_copy { friend class affinity_partitioner; friend class affinity_partition_type; //! Array that remembers affinities of tree positions to affinity_id. /** NULL if my_size==0. */ slot_id* my_array; //! Number of elements in my_array. std::size_t my_size; //! Zeros the fields. affinity_partitioner_base() : my_array(nullptr), my_size(0) {} //! Deallocates my_array. ~affinity_partitioner_base() { resize(0); } //! Resize my_array. /** Retains values if resulting size is the same. */ void resize(unsigned factor) { // Check factor to avoid asking for number of workers while there might be no arena. unsigned max_threads_in_arena = max_concurrency(); std::size_t new_size = factor ? factor * max_threads_in_arena : 0; if (new_size != my_size) { if (my_array) { r1::cache_aligned_deallocate(my_array); // Following two assignments must be done here for sake of exception safety. my_array = nullptr; my_size = 0; } if (new_size) { my_array = static_cast(r1::cache_aligned_allocate(new_size * sizeof(slot_id))); std::fill_n(my_array, new_size, no_slot); my_size = new_size; } } } }; template struct start_for; template struct start_scan; template struct start_reduce; template struct start_deterministic_reduce; struct node { node* my_parent{}; std::atomic m_ref_count{}; node() = default; node(node* parent, int ref_count) : my_parent{parent}, m_ref_count{ref_count} { __TBB_ASSERT(ref_count > 0, "The ref count must be positive"); } }; struct wait_node : node { wait_node() : node{ nullptr, 1 } {} wait_context m_wait{1}; }; //! Join task node that contains shared flag for stealing feedback struct tree_node : public node { small_object_allocator m_allocator; std::atomic m_child_stolen{false}; tree_node(node* parent, int ref_count, small_object_allocator& alloc) : node{parent, ref_count} , m_allocator{alloc} {} void join(task_group_context*) {/*dummy, required only for reduction algorithms*/}; template static void mark_task_stolen(Task &t) { std::atomic &flag = static_cast(t.my_parent)->m_child_stolen; #if TBB_USE_PROFILING_TOOLS // Threading tools respect lock prefix but report false-positive data-race via plain store flag.exchange(true); #else flag.store(true, std::memory_order_relaxed); #endif // TBB_USE_PROFILING_TOOLS } template static bool is_peer_stolen(Task &t) { return static_cast(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed); } }; // Context used to check cancellation state during reduction join process template void fold_tree(node* n, const execution_data& ed) { for (;;) { __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive."); call_itt_task_notify(releasing, n); if (--n->m_ref_count > 0) { return; } node* parent = n->my_parent; if (!parent) { break; }; call_itt_task_notify(acquired, n); TreeNodeType* self = static_cast(n); self->join(ed.context); self->m_allocator.delete_object(self, ed); n = parent; } // Finish parallel for execution when the root (last node) is reached static_cast(n)->m_wait.release(); } //! Depth is a relative depth of recursive division inside a range pool. Relative depth allows //! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented //! by a number that cannot fit into machine word. typedef unsigned char depth_t; //! Range pool stores ranges of type T in a circular buffer with MaxCapacity template class range_vector { depth_t my_head; depth_t my_tail; depth_t my_size; depth_t my_depth[MaxCapacity]; // relative depths of stored ranges tbb::detail::aligned_space my_pool; public: //! initialize via first range in pool range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { my_depth[0] = 0; new( static_cast(my_pool.begin()) ) T(elem);//TODO: std::move? } ~range_vector() { while( !empty() ) pop_back(); } bool empty() const { return my_size == 0; } depth_t size() const { return my_size; } //! Populates range pool via ranges up to max depth or while divisible //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces void split_to_fill(depth_t max_depth) { while( my_size < MaxCapacity && is_divisible(max_depth) ) { depth_t prev = my_head; my_head = (my_head + 1) % MaxCapacity; new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move? my_pool.begin()[prev].~T(); // instead of assignment new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split my_depth[my_head] = ++my_depth[prev]; my_size++; } } void pop_back() { __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size"); my_pool.begin()[my_head].~T(); my_size--; my_head = (my_head + MaxCapacity - 1) % MaxCapacity; } void pop_front() { __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size"); my_pool.begin()[my_tail].~T(); my_size--; my_tail = (my_tail + 1) % MaxCapacity; } T& back() { __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size"); return my_pool.begin()[my_head]; } T& front() { __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size"); return my_pool.begin()[my_tail]; } //! similarly to front(), returns depth of the first range in the pool depth_t front_depth() { __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size"); return my_depth[my_tail]; } depth_t back_depth() { __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size"); return my_depth[my_head]; } bool is_divisible(depth_t max_depth) { return back_depth() < max_depth && back().is_divisible(); } }; //! Provides default methods for partition objects and common algorithm blocks. template struct partition_type_base { typedef detail::split split_type; // decision makers void note_affinity( slot_id ) {} template bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range() template split_type get_split() { return split(); } Partition& self() { return *static_cast(this); } // CRTP helper template void work_balance(StartType &start, Range &range, const execution_data&) { start.run_body( range ); // static partitioner goes here } template void execute(StartType &start, Range &range, execution_data& ed) { // The algorithm in a few words ([]-denotes calls to decision methods of partitioner): // [If this task is stolen, adjust depth and divisions if necessary, set flag]. // If range is divisible { // Spread the work while [initial divisions left]; // Create trap task [if necessary]; // } // If not divisible or [max depth is reached], execute, else do the range pool part if ( range.is_divisible() ) { if ( self().is_divisible() ) { do { // split until is divisible typename Partition::split_type split_obj = self().template get_split(); start.offer_work( split_obj, ed ); } while ( range.is_divisible() && self().is_divisible() ); } } self().work_balance(start, range, ed); } }; //! Provides default splitting strategy for partition objects. template struct adaptive_mode : partition_type_base { typedef Partition my_partition; std::size_t my_divisor; // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves. // A task which has only one index must produce the right split without reserved index in order to avoid // it to be overwritten in note_affinity() of the created (right) task. // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order) static const unsigned factor = 1; adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(0) { // left blank as my_divisor gets overridden in the successors' constructors } /*! Override do_split methods in order to specify splitting strategy */ std::size_t do_split(adaptive_mode &src, split) { return src.my_divisor /= 2u; } }; //! Helper type for checking availability of proportional_split constructor template using supports_proportional_splitting = typename std::is_constructible; //! A helper class to create a proportional_split object for a given type of Range. /** If the Range has proportional_split constructor, then created object splits a provided value in an implemenation-defined proportion; otherwise it represents equal-size split. */ // TODO: check if this helper can be a nested class of proportional_mode. template struct proportion_helper { static proportional_split get_split(std::size_t) { return proportional_split(1,1); } }; template struct proportion_helper::value>::type> { static proportional_split get_split(std::size_t n) { std::size_t right = n / 2; std::size_t left = n - right; return proportional_split(left, right); } }; //! Provides proportional splitting strategy for partition objects template struct proportional_mode : adaptive_mode { typedef Partition my_partition; using partition_type_base::self; // CRTP helper to get access to derived classes proportional_mode() : adaptive_mode() {} proportional_mode(proportional_mode &src, split) : adaptive_mode(src, split()) {} proportional_mode(proportional_mode &src, const proportional_split& split_obj) : adaptive_mode(src, split_obj) { self().my_divisor = do_split(src, split_obj); } std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) { std::size_t portion = split_obj.right() * my_partition::factor; portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); src.my_divisor -= portion; return portion; } bool is_divisible() { // part of old should_execute_range() return self().my_divisor > my_partition::factor; } template proportional_split get_split() { // Create a proportion for the number of threads expected to handle "this" subrange return proportion_helper::get_split( self().my_divisor / my_partition::factor ); } }; static std::size_t get_initial_partition_head() { int current_index = tbb::this_task_arena::current_thread_index(); if (current_index == tbb::task_arena::not_initialized) current_index = 0; return size_t(current_index); } //! Provides default linear indexing of partitioner's sequence template struct linear_affinity_mode : proportional_mode { std::size_t my_head; std::size_t my_max_affinity; using proportional_mode::self; linear_affinity_mode() : proportional_mode(), my_head(get_initial_partition_head()), my_max_affinity(self().my_divisor) {} linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode(src, split()) , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode(src, split_obj) , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} void spawn_task(task& t, task_group_context& ctx) { if (self().my_divisor) { spawn(t, ctx, slot_id(my_head)); } else { spawn(t, ctx); } } }; static bool is_stolen_task(const execution_data& ed) { return execution_slot(ed) != original_slot(ed); } /*! Determine work-balance phase implementing splitting & stealing actions */ template struct dynamic_grainsize_mode : Mode { using Mode::self; enum { begin = 0, run, pass } my_delay; depth_t my_max_depth; static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; dynamic_grainsize_mode(): Mode() , my_delay(begin) , my_max_depth(__TBB_INIT_DEPTH) {} dynamic_grainsize_mode(dynamic_grainsize_mode& p, split) : Mode(p, split()) , my_delay(pass) , my_max_depth(p.my_max_depth) {} dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj) : Mode(p, split_obj) , my_delay(begin) , my_max_depth(p.my_max_depth) {} template bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range() if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)? if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task #if __TBB_USE_OPTIONAL_RTTI // RTTI is available, check whether the cast is valid // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast(t.m_parent), 0); // correctness of the cast relies on avoiding the root task for which: // - initial value of my_divisor != 0 (protected by separate assertion) // - is_stolen_task() always returns false for the root task. #endif tree_node::mark_task_stolen(t); if( !my_max_depth ) my_max_depth++; my_max_depth += __TBB_DEMAND_DEPTH_ADD; return true; } } return false; } depth_t max_depth() { return my_max_depth; } void align_depth(depth_t base) { __TBB_ASSERT(base <= my_max_depth, 0); my_max_depth -= base; } template void work_balance(StartType &start, Range &range, execution_data& ed) { if( !range.is_divisible() || !self().max_depth() ) { start.run_body( range ); } else { // do range pool range_vector range_pool(range); do { range_pool.split_to_fill(self().max_depth()); // fill range pool if( self().check_for_demand( start ) ) { if( range_pool.size() > 1 ) { start.offer_work( range_pool.front(), range_pool.front_depth(), ed ); range_pool.pop_front(); continue; } if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task continue; // note: next split_to_fill() should split range at least once } start.run_body( range_pool.back() ); range_pool.pop_back(); } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() ); } } template bool check_for_demand(Task& t) { if ( pass == my_delay ) { if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more else if ( self().my_divisor && my_max_depth ) { // make balancing task self().my_divisor = 0; // once for each task; depth will be decreased in align_depth() return true; } else if ( tree_node::is_peer_stolen(t) ) { my_max_depth += __TBB_DEMAND_DEPTH_ADD; return true; } } else if( begin == my_delay ) { my_delay = pass; } return false; } }; class auto_partition_type: public dynamic_grainsize_mode > { public: auto_partition_type( const auto_partitioner& ) { my_divisor *= __TBB_INITIAL_CHUNKS; } auto_partition_type( auto_partition_type& src, split) : dynamic_grainsize_mode >(src, split()) {} bool is_divisible() { // part of old should_execute_range() if( my_divisor > 1 ) return true; if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead // keep same fragmentation while splitting for the local task pool my_max_depth--; my_divisor = 0; // decrease max_depth once per task return true; } else return false; } template bool check_for_demand(Task& t) { if (tree_node::is_peer_stolen(t)) { my_max_depth += __TBB_DEMAND_DEPTH_ADD; return true; } else return false; } void spawn_task(task& t, task_group_context& ctx) { spawn(t, ctx); } }; class simple_partition_type: public partition_type_base { public: simple_partition_type( const simple_partitioner& ) {} simple_partition_type( const simple_partition_type&, split ) {} //! simplified algorithm template void execute(StartType &start, Range &range, execution_data& ed) { split_type split_obj = split(); // start.offer_work accepts split_type as reference while( range.is_divisible() ) start.offer_work( split_obj, ed ); start.run_body( range ); } void spawn_task(task& t, task_group_context& ctx) { spawn(t, ctx); } }; class static_partition_type : public linear_affinity_mode { public: typedef detail::proportional_split split_type; static_partition_type( const static_partitioner& ) {} static_partition_type( static_partition_type& p, const proportional_split& split_obj ) : linear_affinity_mode(p, split_obj) {} }; class affinity_partition_type : public dynamic_grainsize_mode > { static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units slot_id* my_array; public: static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task typedef detail::proportional_split split_type; affinity_partition_type( affinity_partitioner_base& ap ) { __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); ap.resize(factor); my_array = ap.my_array; my_max_depth = factor_power + 1; __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); } affinity_partition_type(affinity_partition_type& p, split) : dynamic_grainsize_mode >(p, split()) , my_array(p.my_array) {} affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj) : dynamic_grainsize_mode >(p, split_obj) , my_array(p.my_array) {} void note_affinity(slot_id id) { if( my_divisor ) my_array[my_head] = id; } void spawn_task(task& t, task_group_context& ctx) { if (my_divisor) { if (!my_array[my_head]) { // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse spawn(t, ctx, slot_id(my_head / factor)); } else { spawn(t, ctx, my_array[my_head]); } } else { spawn(t, ctx); } } }; //! A simple partitioner /** Divides the range until the range is not divisible. @ingroup algorithms */ class simple_partitioner { public: simple_partitioner() {} private: template friend struct start_for; template friend struct start_reduce; template friend struct start_deterministic_reduce; template friend struct start_scan; // new implementation just extends existing interface typedef simple_partition_type task_partition_type; // TODO: consider to make split_type public typedef simple_partition_type::split_type split_type; // for parallel_scan only class partition_type { public: bool should_execute_range(const execution_data& ) {return false;} partition_type( const simple_partitioner& ) {} partition_type( const partition_type&, split ) {} }; }; //! An auto partitioner /** The range is initial divided into several large chunks. Chunks are further subdivided into smaller pieces if demand detected and they are divisible. @ingroup algorithms */ class auto_partitioner { public: auto_partitioner() {} private: template friend struct start_for; template friend struct start_reduce; template friend struct start_deterministic_reduce; template friend struct start_scan; // new implementation just extends existing interface typedef auto_partition_type task_partition_type; // TODO: consider to make split_type public typedef auto_partition_type::split_type split_type; //! Backward-compatible partition for auto and affinity partition objects. class partition_type { size_t num_chunks; static const size_t VICTIM_CHUNKS = 4; public: bool should_execute_range(const execution_data& ed) { if( num_chunks friend struct start_for; template friend struct start_reduce; template friend struct start_deterministic_reduce; template friend struct start_scan; // new implementation just extends existing interface typedef static_partition_type task_partition_type; // TODO: consider to make split_type public typedef static_partition_type::split_type split_type; }; //! An affinity partitioner class affinity_partitioner : affinity_partitioner_base { public: affinity_partitioner() {} private: template friend struct start_for; template friend struct start_reduce; template friend struct start_deterministic_reduce; template friend struct start_scan; // new implementation just extends existing interface typedef affinity_partition_type task_partition_type; // TODO: consider to make split_type public typedef affinity_partition_type::split_type split_type; }; } // namespace d1 } // namespace detail inline namespace v1 { // Partitioners using detail::d1::auto_partitioner; using detail::d1::simple_partitioner; using detail::d1::static_partitioner; using detail::d1::affinity_partitioner; // Split types using detail::split; using detail::proportional_split; } // namespace v1 } // namespace tbb #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning (pop) #endif // warning 4244 is back #undef __TBB_INITIAL_CHUNKS #undef __TBB_RANGE_POOL_CAPACITY #undef __TBB_INIT_DEPTH #endif /* __TBB_partitioner_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/profiling.h000066400000000000000000000234251514453371700307500ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_profiling_H #define __TBB_profiling_H #include "detail/_config.h" #include #include namespace tbb { namespace detail { inline namespace d0 { // include list of index names #define TBB_STRING_RESOURCE(index_name,str) index_name, enum string_resource_index : std::uintptr_t { #include "detail/_string_resource.h" NUM_STRINGS }; #undef TBB_STRING_RESOURCE enum itt_relation { __itt_relation_is_unknown = 0, __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ }; //! Unicode support #if (_WIN32||_WIN64) //! Unicode character type. Always wchar_t on Windows. using tchar = wchar_t; #else /* !WIN */ using tchar = char; #endif /* !WIN */ } // namespace d0 } // namespace detail } // namespace tbb #include #if _WIN32||_WIN64 #include /* mbstowcs_s */ #endif // Need these to work regardless of tools support namespace tbb { namespace detail { namespace d1 { enum notify_type {prepare=0, cancel, acquired, releasing, destroy}; enum itt_domain_enum { ITT_DOMAIN_FLOW=0, ITT_DOMAIN_MAIN=1, ITT_DOMAIN_ALGO=2, ITT_NUM_DOMAINS }; } // namespace d1 namespace r1 { TBB_EXPORT void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); TBB_EXPORT void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, void* parent, unsigned long long parent_extra, string_resource_index name_index); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, void* parent, unsigned long long parent_extra, string_resource_index name_index); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, string_resource_index key, const char* value); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, string_resource_index key, void* value); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, itt_relation relation, void* addr1, unsigned long long addr1_extra); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, void* parent, unsigned long long parent_extra, string_resource_index /* name_index */); TBB_EXPORT void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); } // namespace r1 namespace d1 { #if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) { std::size_t len; mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE); return len; // mbstowcs_s counts null terminator } #endif #if TBB_USE_PROFILING_TOOLS inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) { #if (_WIN32||_WIN64) std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0); wchar_t *type = new wchar_t[len_type]; multibyte_to_widechar(type, objtype, len_type); std::size_t len_name = multibyte_to_widechar(nullptr, objname, 0); wchar_t *name = new wchar_t[len_name]; multibyte_to_widechar(name, objname, len_name); #else // WIN const char *type = objtype; const char *name = objname; #endif r1::create_itt_sync(ptr, type, name); #if (_WIN32||_WIN64) delete[] type; delete[] name; #endif // WIN } // Distinguish notifications on task for reducing overheads #if TBB_USE_PROFILING_TOOLS == 2 inline void call_itt_task_notify(d1::notify_type t, void *ptr) { r1::call_itt_notify((int)t, ptr); } #else inline void call_itt_task_notify(d1::notify_type, void *) {} #endif // TBB_USE_PROFILING_TOOLS inline void call_itt_notify(d1::notify_type t, void *ptr) { r1::call_itt_notify((int)t, ptr); } #if (_WIN32||_WIN64) && !__MINGW32__ inline void itt_set_sync_name(void* obj, const wchar_t* name) { r1::itt_set_sync_name(obj, name); } inline void itt_set_sync_name(void* obj, const char* name) { std::size_t len_name = multibyte_to_widechar(nullptr, name, 0); wchar_t *obj_name = new wchar_t[len_name]; multibyte_to_widechar(obj_name, name, len_name); r1::itt_set_sync_name(obj, obj_name); delete[] obj_name; } #else inline void itt_set_sync_name( void* obj, const char* name) { r1::itt_set_sync_name(obj, name); } #endif //WIN inline void itt_make_task_group(itt_domain_enum domain, void* group, unsigned long long group_extra, void* parent, unsigned long long parent_extra, string_resource_index name_index) { r1::itt_make_task_group(domain, group, group_extra, parent, parent_extra, name_index); } inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra, string_resource_index key, const char *value ) { r1::itt_metadata_str_add( domain, addr, addr_extra, key, value ); } inline void register_node_addr(itt_domain_enum domain, void *addr, unsigned long long addr_extra, string_resource_index key, void *value) { r1::itt_metadata_ptr_add(domain, addr, addr_extra, key, value); } inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, itt_relation relation, void *addr1, unsigned long long addr1_extra ) { r1::itt_relation_add( domain, addr0, addr0_extra, relation, addr1, addr1_extra ); } inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra, void *parent, unsigned long long parent_extra, string_resource_index name_index ) { r1::itt_task_begin( domain, task, task_extra, parent, parent_extra, name_index ); } inline void itt_task_end( itt_domain_enum domain ) { r1::itt_task_end( domain ); } inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra, void *parent, unsigned long long parent_extra, string_resource_index name_index ) { r1::itt_region_begin( domain, region, region_extra, parent, parent_extra, name_index ); } inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra ) { r1::itt_region_end( domain, region, region_extra ); } #else inline void create_itt_sync(void* /*ptr*/, const char* /*objtype*/, const char* /*objname*/) {} inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {} inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {} #endif // TBB_USE_PROFILING_TOOLS #if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) class event { /** This class supports user event traces through itt. Common use-case is tagging data flow graph tasks (data-id) and visualization by Intel Advisor Flow Graph Analyzer (FGA) **/ // TODO: Replace implementation by itt user event api. const std::string my_name; static void emit_trace(const std::string &input) { itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); } public: event(const std::string &input) : my_name( input ) { } void emit() { emit_trace(my_name); } static void emit(const std::string &description) { emit_trace(description); } }; #else // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) // Using empty struct if user event tracing is disabled: struct event { event(const std::string &) { } void emit() { } static void emit(const std::string &) { } }; #endif // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) } // namespace d1 } // namespace detail namespace profiling { using detail::d1::event; } } // namespace tbb #endif /* __TBB_profiling_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/queuing_mutex.h000066400000000000000000000143671514453371700316630ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_queuing_mutex_H #define __TBB_queuing_mutex_H #include "detail/_namespace_injection.h" #include "detail/_assert.h" #include "detail/_utils.h" #include "detail/_mutex_common.h" #include "profiling.h" #include namespace tbb { namespace detail { namespace d1 { //! Queuing mutex with local-only spinning. /** @ingroup synchronization */ class queuing_mutex { public: //! Construct unacquired mutex. queuing_mutex() noexcept { create_itt_sync(this, "tbb::queuing_mutex", ""); }; queuing_mutex(const queuing_mutex&) = delete; queuing_mutex& operator=(const queuing_mutex&) = delete; //! The scoped locking pattern /** It helps to avoid the common problem of forgetting to release lock. It also nicely provides the "node" for queuing locks. */ class scoped_lock { //! Reset fields to mean "no lock held". void reset() { m_mutex = nullptr; } public: //! Construct lock that has not acquired a mutex. /** Equivalent to zero-initialization of *this. */ scoped_lock() = default; //! Acquire lock on given mutex. scoped_lock(queuing_mutex& m) { acquire(m); } //! Release lock (if lock is held). ~scoped_lock() { if (m_mutex) release(); } //! No Copy scoped_lock( const scoped_lock& ) = delete; scoped_lock& operator=( const scoped_lock& ) = delete; //! Acquire lock on given mutex. void acquire( queuing_mutex& m ) { __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); // Must set all fields before the exchange, because once the // exchange executes, *this becomes accessible to other threads. m_mutex = &m; m_next.store(nullptr, std::memory_order_relaxed); m_going.store(0U, std::memory_order_relaxed); // x86 compare exchange operation always has a strong fence // "sending" the fields initialized above to other processors. scoped_lock* pred = m.q_tail.exchange(this); if (pred) { call_itt_notify(prepare, &m); __TBB_ASSERT(pred->m_next.load(std::memory_order_relaxed) == nullptr, "the predecessor has another successor!"); pred->m_next.store(this, std::memory_order_release); spin_wait_while_eq(m_going, 0U); } call_itt_notify(acquired, &m); } //! Acquire lock on given mutex if free (i.e. non-blocking) bool try_acquire( queuing_mutex& m ) { __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); // Must set all fields before the compare_exchange_strong, because once the // compare_exchange_strong executes, *this becomes accessible to other threads. m_next.store(nullptr, std::memory_order_relaxed); m_going.store(0U, std::memory_order_relaxed); scoped_lock* expected = nullptr; // The compare_exchange_strong must have release semantics, because we are // "sending" the fields initialized above to other processors. // x86 compare exchange operation always has a strong fence if (!m.q_tail.compare_exchange_strong(expected, this, std::memory_order_acq_rel)) return false; m_mutex = &m; call_itt_notify(acquired, &m); return true; } //! Release lock. void release() { __TBB_ASSERT(this->m_mutex, "no lock acquired"); call_itt_notify(releasing, this->m_mutex); if (m_next.load(std::memory_order_relaxed) == nullptr) { scoped_lock* expected = this; if (m_mutex->q_tail.compare_exchange_strong(expected, nullptr)) { // this was the only item in the queue, and the queue is now empty. reset(); return; } // Someone in the queue spin_wait_while_eq(m_next, nullptr); } m_next.load(std::memory_order_acquire)->m_going.store(1U, std::memory_order_release); reset(); } private: //! The pointer to the mutex owned, or NULL if not holding a mutex. queuing_mutex* m_mutex{nullptr}; //! The pointer to the next competitor for a mutex std::atomic m_next{nullptr}; //! The local spin-wait variable /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of zero-initialization. Defining it as an entire word instead of a byte seems to help performance slightly. */ std::atomic m_going{0U}; }; // Mutex traits static constexpr bool is_rw_mutex = false; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = true; private: //! The last competitor requesting the lock std::atomic q_tail{nullptr}; }; #if TBB_USE_PROFILING_TOOLS inline void set_name(queuing_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } #if (_WIN32||_WIN64) inline void set_name(queuing_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif //WIN #else inline void set_name(queuing_mutex&, const char*) {} #if (_WIN32||_WIN64) inline void set_name(queuing_mutex&, const wchar_t*) {} #endif //WIN #endif } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::queuing_mutex; } // namespace v1 namespace profiling { using detail::d1::set_name; } } // namespace tbb #endif /* __TBB_queuing_mutex_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/queuing_rw_mutex.h000066400000000000000000000147471514453371700323750ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_queuing_rw_mutex_H #define __TBB_queuing_rw_mutex_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_assert.h" #include "detail/_mutex_common.h" #include "profiling.h" #include #include namespace tbb { namespace detail { namespace r1 { struct queuing_rw_mutex_impl; } namespace d1 { //! Queuing reader-writer mutex with local-only spinning. /** Adapted from Krieger, Stumm, et al. pseudocode at https://www.researchgate.net/publication/221083709_A_Fair_Fast_Scalable_Reader-Writer_Lock @ingroup synchronization */ class queuing_rw_mutex { friend r1::queuing_rw_mutex_impl; public: //! Construct unacquired mutex. queuing_rw_mutex() noexcept { create_itt_sync(this, "tbb::queuing_rw_mutex", ""); } //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL ~queuing_rw_mutex() { __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex"); } //! No Copy queuing_rw_mutex(const queuing_rw_mutex&) = delete; queuing_rw_mutex& operator=(const queuing_rw_mutex&) = delete; //! The scoped locking pattern /** It helps to avoid the common problem of forgetting to release lock. It also nicely provides the "node" for queuing locks. */ class scoped_lock { friend r1::queuing_rw_mutex_impl; //! Initialize fields to mean "no lock held". void initialize() { my_mutex = nullptr; my_internal_lock.store(0, std::memory_order_relaxed); my_going.store(0, std::memory_order_relaxed); #if TBB_USE_ASSERT my_state = 0xFF; // Set to invalid state my_next.store(reinterpret_cast(reinterpret_cast(-1)), std::memory_order_relaxed); my_prev.store(reinterpret_cast(reinterpret_cast(-1)), std::memory_order_relaxed); #endif /* TBB_USE_ASSERT */ } public: //! Construct lock that has not acquired a mutex. /** Equivalent to zero-initialization of *this. */ scoped_lock() {initialize();} //! Acquire lock on given mutex. scoped_lock( queuing_rw_mutex& m, bool write=true ) { initialize(); acquire(m,write); } //! Release lock (if lock is held). ~scoped_lock() { if( my_mutex ) release(); } //! No Copy scoped_lock(const scoped_lock&) = delete; scoped_lock& operator=(const scoped_lock&) = delete; //! Acquire lock on given mutex. void acquire( queuing_rw_mutex& m, bool write=true ); //! Acquire lock on given mutex if free (i.e. non-blocking) bool try_acquire( queuing_rw_mutex& m, bool write=true ); //! Release lock. void release(); //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ bool upgrade_to_writer(); //! Downgrade writer to become a reader. bool downgrade_to_reader(); bool is_writer() const; private: //! The pointer to the mutex owned, or NULL if not holding a mutex. queuing_rw_mutex* my_mutex; //! The 'pointer' to the previous and next competitors for a mutex std::atomic my_prev; std::atomic my_next; using state_t = unsigned char ; //! State of the request: reader, writer, active reader, other service states std::atomic my_state; //! The local spin-wait variable /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */ std::atomic my_going; //! A tiny internal lock std::atomic my_internal_lock; }; // Mutex traits static constexpr bool is_rw_mutex = true; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = true; private: //! The last competitor requesting the lock std::atomic q_tail{nullptr}; }; #if TBB_USE_PROFILING_TOOLS inline void set_name(queuing_rw_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } #if (_WIN32||_WIN64) inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif //WIN #else inline void set_name(queuing_rw_mutex&, const char*) {} #if (_WIN32||_WIN64) inline void set_name(queuing_rw_mutex&, const wchar_t*) {} #endif //WIN #endif } // namespace d1 namespace r1 { TBB_EXPORT void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); TBB_EXPORT bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); TBB_EXPORT void release(d1::queuing_rw_mutex::scoped_lock&); TBB_EXPORT bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); TBB_EXPORT bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); TBB_EXPORT bool is_writer(const d1::queuing_rw_mutex::scoped_lock&); } // namespace r1 namespace d1 { inline void queuing_rw_mutex::scoped_lock::acquire(queuing_rw_mutex& m,bool write) { r1::acquire(m, *this, write); } inline bool queuing_rw_mutex::scoped_lock::try_acquire(queuing_rw_mutex& m, bool write) { return r1::try_acquire(m, *this, write); } inline void queuing_rw_mutex::scoped_lock::release() { r1::release(*this); } inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() { return r1::upgrade_to_writer(*this); } inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() { return r1::downgrade_to_reader(*this); } inline bool queuing_rw_mutex::scoped_lock::is_writer() const { return r1::is_writer(*this); } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::queuing_rw_mutex; } // namespace v1 namespace profiling { using detail::d1::set_name; } } // namespace tbb #endif /* __TBB_queuing_rw_mutex_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/rw_mutex.h000066400000000000000000000176271514453371700306400ustar00rootroot00000000000000/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_rw_mutex_H #define __TBB_rw_mutex_H #include "detail/_namespace_injection.h" #include "detail/_utils.h" #include "detail/_waitable_atomic.h" #include "detail/_scoped_lock.h" #include "detail/_mutex_common.h" #include "profiling.h" namespace tbb { namespace detail { namespace d1 { class rw_mutex { public: //! Constructors rw_mutex() noexcept : m_state(0) { create_itt_sync(this, "tbb::rw_mutex", ""); } //! Destructor ~rw_mutex() { __TBB_ASSERT(!m_state.load(std::memory_order_relaxed), "destruction of an acquired mutex"); } //! No Copy rw_mutex(const rw_mutex&) = delete; rw_mutex& operator=(const rw_mutex&) = delete; using scoped_lock = rw_scoped_lock; //! Mutex traits static constexpr bool is_rw_mutex = true; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = false; //! Acquire lock void lock() { call_itt_notify(prepare, this); while (!try_lock()) { if (!(m_state.load(std::memory_order_relaxed) & WRITER_PENDING)) { // no pending writers m_state |= WRITER_PENDING; } auto wakeup_condition = [&] { return !(m_state.load(std::memory_order_relaxed) & BUSY); }; adaptive_wait_on_address(this, wakeup_condition, WRITER_CONTEXT); } call_itt_notify(acquired, this); } //! Try acquiring lock (non-blocking) /** Return true if lock acquired; false otherwise. */ bool try_lock() { // for a writer: only possible to acquire if no active readers or writers // Use relaxed memory fence is OK here because // Acquire memory fence guaranteed by compare_exchange_strong() state_type s = m_state.load(std::memory_order_relaxed); if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 if (m_state.compare_exchange_strong(s, WRITER)) { call_itt_notify(acquired, this); return true; // successfully stored writer flag } } return false; } //! Release lock void unlock() { call_itt_notify(releasing, this); state_type curr_state = (m_state &= READERS | WRITER_PENDING); // Returns current state if (curr_state & WRITER_PENDING) { r1::notify_by_address(this, WRITER_CONTEXT); } else { // It's possible that WRITER sleeps without WRITER_PENDING, // because other thread might clear this bit at upgrade() r1::notify_by_address_all(this); } } //! Lock shared ownership mutex void lock_shared() { call_itt_notify(prepare, this); while (!try_lock_shared()) { state_type has_writer = WRITER | WRITER_PENDING; auto wakeup_condition = [&] { return !(m_state.load(std::memory_order_relaxed) & has_writer); }; adaptive_wait_on_address(this, wakeup_condition, READER_CONTEXT); } __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & READERS, "invalid state of a read lock: no readers"); } //! Try lock shared ownership mutex bool try_lock_shared() { // for a reader: acquire if no active or waiting writers // Use relaxed memory fence is OK here because // Acquire memory fence guaranteed by fetch_add() state_type has_writer = WRITER | WRITER_PENDING; if (!(m_state.load(std::memory_order_relaxed) & has_writer)) { if (m_state.fetch_add(ONE_READER) & has_writer) { m_state -= ONE_READER; r1::notify_by_address(this, WRITER_CONTEXT); } else { call_itt_notify(acquired, this); return true; // successfully stored increased number of readers } } return false; } //! Unlock shared ownership mutex void unlock_shared() { __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & READERS, "invalid state of a read lock: no readers"); call_itt_notify(releasing, this); state_type curr_state = (m_state -= ONE_READER); // Returns current state if (curr_state & (WRITER_PENDING)) { r1::notify_by_address(this, WRITER_CONTEXT); } else { // It's possible that WRITER sleeps without WRITER_PENDING, // because other thread might clear this bit at upgrade() r1::notify_by_address_all(this); } } private: /** Internal non ISO C++ standard API **/ //! This API is used through the scoped_lock class //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ bool upgrade() { state_type s = m_state.load(std::memory_order_relaxed); __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); // Check and set writer-pending flag. // Required conditions: either no pending writers, or we are the only reader // (with multiple readers and pending writer, another upgrade could have been requested) while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { auto wakeup_condition = [&] { return (m_state.load(std::memory_order_relaxed) & READERS) == ONE_READER; }; while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) { adaptive_wait_on_address(this, wakeup_condition, WRITER_CONTEXT); } __TBB_ASSERT((m_state.load(std::memory_order_relaxed) & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer"); // Both new readers and writers are blocked at this time m_state -= (ONE_READER + WRITER_PENDING); return true; // successfully upgraded } } // Slow reacquire unlock_shared(); lock(); return false; } //! Downgrade writer to a reader void downgrade() { __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & WRITER, nullptr), call_itt_notify(releasing, this); m_state += (ONE_READER - WRITER); if (!(m_state & WRITER_PENDING)) { r1::notify_by_address(this, READER_CONTEXT); } __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & READERS, "invalid state after downgrade: no readers"); } using state_type = std::intptr_t; static constexpr state_type WRITER = 1; static constexpr state_type WRITER_PENDING = 2; static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); static constexpr state_type ONE_READER = 4; static constexpr state_type BUSY = WRITER | READERS; using context_type = std::uintptr_t; static constexpr context_type WRITER_CONTEXT = 0; static constexpr context_type READER_CONTEXT = 1; friend scoped_lock; //! State of lock /** Bit 0 = writer is holding lock Bit 1 = request by a writer to acquire lock (hint to readers to wait) Bit 2..N = number of readers holding lock */ std::atomic m_state; }; // class rw_mutex } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::rw_mutex; } // namespace v1 } // namespace tbb #endif // __TBB_rw_mutex_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/scalable_allocator.h000066400000000000000000000266071514453371700325720ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_scalable_allocator_H #define __TBB_scalable_allocator_H #ifdef __cplusplus #include "oneapi/tbb/detail/_config.h" #include "oneapi/tbb/detail/_utils.h" #include #include #include /* std::bad_alloc() */ #else #include "oneapi/tbb/detail/_export.h" #include /* Need ptrdiff_t and size_t from here. */ #if !defined(_MSC_VER) || defined(__clang__) #include /* Need intptr_t from here. */ #endif #endif #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT #include #endif #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ #if _MSC_VER #define __TBB_EXPORTED_FUNC __cdecl #else #define __TBB_EXPORTED_FUNC #endif /** The "malloc" analogue to allocate block of memory of size bytes. * @ingroup memory_allocation */ TBBMALLOC_EXPORT void* __TBB_EXPORTED_FUNC scalable_malloc(size_t size); /** The "free" analogue to discard a previously allocated piece of memory. @ingroup memory_allocation */ TBBMALLOC_EXPORT void __TBB_EXPORTED_FUNC scalable_free(void* ptr); /** The "realloc" analogue complementing scalable_malloc. @ingroup memory_allocation */ TBBMALLOC_EXPORT void* __TBB_EXPORTED_FUNC scalable_realloc(void* ptr, size_t size); /** The "calloc" analogue complementing scalable_malloc. @ingroup memory_allocation */ TBBMALLOC_EXPORT void* __TBB_EXPORTED_FUNC scalable_calloc(size_t nobj, size_t size); /** The "posix_memalign" analogue. @ingroup memory_allocation */ TBBMALLOC_EXPORT int __TBB_EXPORTED_FUNC scalable_posix_memalign(void** memptr, size_t alignment, size_t size); /** The "_aligned_malloc" analogue. @ingroup memory_allocation */ TBBMALLOC_EXPORT void* __TBB_EXPORTED_FUNC scalable_aligned_malloc(size_t size, size_t alignment); /** The "_aligned_realloc" analogue. @ingroup memory_allocation */ TBBMALLOC_EXPORT void* __TBB_EXPORTED_FUNC scalable_aligned_realloc(void* ptr, size_t size, size_t alignment); /** The "_aligned_free" analogue. @ingroup memory_allocation */ TBBMALLOC_EXPORT void __TBB_EXPORTED_FUNC scalable_aligned_free(void* ptr); /** The analogue of _msize/malloc_size/malloc_usable_size. Returns the usable size of a memory block previously allocated by scalable_*, or 0 (zero) if ptr does not point to such a block. @ingroup memory_allocation */ TBBMALLOC_EXPORT size_t __TBB_EXPORTED_FUNC scalable_msize(void* ptr); /* Results for scalable_allocation_* functions */ typedef enum { TBBMALLOC_OK, TBBMALLOC_INVALID_PARAM, TBBMALLOC_UNSUPPORTED, TBBMALLOC_NO_MEMORY, TBBMALLOC_NO_EFFECT } ScalableAllocationResult; /* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages. scalable_allocation_mode call has priority over environment variable. */ typedef enum { TBBMALLOC_USE_HUGE_PAGES, /* value turns using huge pages on and off */ /* deprecated, kept for backward compatibility only */ USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES, /* try to limit memory consumption value (Bytes), clean internal buffers if limit is exceeded, but not prevents from requesting memory from OS */ TBBMALLOC_SET_SOFT_HEAP_LIMIT, /* Lower bound for the size (Bytes), that is interpreted as huge * and not released during regular cleanup operations. */ TBBMALLOC_SET_HUGE_SIZE_THRESHOLD } AllocationModeParam; /** Set TBB allocator-specific allocation modes. @ingroup memory_allocation */ TBBMALLOC_EXPORT int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value); typedef enum { /* Clean internal allocator buffers for all threads. Returns TBBMALLOC_NO_EFFECT if no buffers cleaned, TBBMALLOC_OK if some memory released from buffers. */ TBBMALLOC_CLEAN_ALL_BUFFERS, /* Clean internal allocator buffer for current thread only. Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */ TBBMALLOC_CLEAN_THREAD_BUFFERS } ScalableAllocationCmd; /** Call TBB allocator-specific commands. @ingroup memory_allocation */ TBBMALLOC_EXPORT int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param); #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ #ifdef __cplusplus //! The namespace rml contains components of low-level memory pool interface. namespace rml { class MemoryPool; typedef void *(*rawAllocType)(std::intptr_t pool_id, std::size_t &bytes); // returns non-zero in case of error typedef int (*rawFreeType)(std::intptr_t pool_id, void* raw_ptr, std::size_t raw_bytes); struct MemPoolPolicy { enum { TBBMALLOC_POOL_VERSION = 1 }; rawAllocType pAlloc; rawFreeType pFree; // granularity of pAlloc allocations. 0 means default used. std::size_t granularity; int version; // all memory consumed at 1st pAlloc call and never returned, // no more pAlloc calls after 1st unsigned fixedPool : 1, // memory consumed but returned only at pool termination keepAllMemory : 1, reserved : 30; MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_, std::size_t granularity_ = 0, bool fixedPool_ = false, bool keepAllMemory_ = false) : pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION), fixedPool(fixedPool_), keepAllMemory(keepAllMemory_), reserved(0) {} }; // enums have same values as appropriate enums from ScalableAllocationResult // TODO: use ScalableAllocationResult in pool_create directly enum MemPoolError { // pool created successfully POOL_OK = TBBMALLOC_OK, // invalid policy parameters found INVALID_POLICY = TBBMALLOC_INVALID_PARAM, // requested pool policy is not supported by allocator library UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED, // lack of memory during pool creation NO_MEMORY = TBBMALLOC_NO_MEMORY, // action takes no effect NO_EFFECT = TBBMALLOC_NO_EFFECT }; TBBMALLOC_EXPORT MemPoolError pool_create_v1(std::intptr_t pool_id, const MemPoolPolicy *policy, rml::MemoryPool **pool); TBBMALLOC_EXPORT bool pool_destroy(MemoryPool* memPool); TBBMALLOC_EXPORT void *pool_malloc(MemoryPool* memPool, std::size_t size); TBBMALLOC_EXPORT void *pool_realloc(MemoryPool* memPool, void *object, std::size_t size); TBBMALLOC_EXPORT void *pool_aligned_malloc(MemoryPool* mPool, std::size_t size, std::size_t alignment); TBBMALLOC_EXPORT void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, std::size_t size, std::size_t alignment); TBBMALLOC_EXPORT bool pool_reset(MemoryPool* memPool); TBBMALLOC_EXPORT bool pool_free(MemoryPool *memPool, void *object); TBBMALLOC_EXPORT MemoryPool *pool_identify(void *object); TBBMALLOC_EXPORT std::size_t pool_msize(MemoryPool *memPool, void *object); } // namespace rml namespace tbb { namespace detail { namespace d1 { // keep throw in a separate function to prevent code bloat template void throw_exception(const E &e) { #if TBB_USE_EXCEPTIONS throw e; #else suppress_unused_warning(e); #endif } template class scalable_allocator { public: using value_type = T; using propagate_on_container_move_assignment = std::true_type; //! Always defined for TBB containers using is_always_equal = std::true_type; scalable_allocator() = default; template scalable_allocator(const scalable_allocator&) noexcept {} //! Allocate space for n objects. __TBB_nodiscard T* allocate(std::size_t n) { T* p = static_cast(scalable_malloc(n * sizeof(value_type))); if (!p) { throw_exception(std::bad_alloc()); } return p; } //! Free previously allocated block of memory void deallocate(T* p, std::size_t) { scalable_free(p); } #if TBB_ALLOCATOR_TRAITS_BROKEN using pointer = value_type*; using const_pointer = const value_type*; using reference = value_type&; using const_reference = const value_type&; using difference_type = std::ptrdiff_t; using size_type = std::size_t; template struct rebind { using other = scalable_allocator; }; //! Largest value for which method allocate might succeed. size_type max_size() const noexcept { size_type absolutemax = static_cast(-1) / sizeof (value_type); return (absolutemax > 0 ? absolutemax : 1); } template void construct(U *p, Args&&... args) { ::new((void *)p) U(std::forward(args)...); } void destroy(pointer p) { p->~value_type(); } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } #endif // TBB_ALLOCATOR_TRAITS_BROKEN }; #if TBB_ALLOCATOR_TRAITS_BROKEN template<> class scalable_allocator { public: using pointer = void*; using const_pointer = const void*; using value_type = void; template struct rebind { using other = scalable_allocator; }; }; #endif template inline bool operator==(const scalable_allocator&, const scalable_allocator&) noexcept { return true; } #if !__TBB_CPP20_COMPARISONS_PRESENT template inline bool operator!=(const scalable_allocator&, const scalable_allocator&) noexcept { return false; } #endif #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT //! C++17 memory resource implementation for scalable allocator //! ISO C++ Section 23.12.2 class scalable_resource_impl : public std::pmr::memory_resource { private: void* do_allocate(std::size_t bytes, std::size_t alignment) override { void* p = scalable_aligned_malloc(bytes, alignment); if (!p) { throw_exception(std::bad_alloc()); } return p; } void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*alignment*/) override { scalable_free(ptr); } //! Memory allocated by one instance of scalable_resource_impl could be deallocated by any //! other instance of this class bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { return this == &other || #if __TBB_USE_OPTIONAL_RTTI dynamic_cast(&other) != nullptr; #else false; #endif } }; //! Global scalable allocator memory resource provider inline std::pmr::memory_resource* scalable_memory_resource() noexcept { static tbb::detail::d1::scalable_resource_impl scalable_res; return &scalable_res; } #endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::scalable_allocator; #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT using detail::d1::scalable_memory_resource; #endif } // namespace v1 } // namespace tbb #endif /* __cplusplus */ #endif /* __TBB_scalable_allocator_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/spin_mutex.h000066400000000000000000000066711514453371700311560ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_spin_mutex_H #define __TBB_spin_mutex_H #include "detail/_namespace_injection.h" #include "detail/_mutex_common.h" #include "profiling.h" #include "detail/_assert.h" #include "detail/_utils.h" #include "detail/_scoped_lock.h" #include namespace tbb { namespace detail { namespace d1 { #if __TBB_TSX_INTRINSICS_PRESENT class rtm_mutex; #endif /** A spin_mutex is a low-level synchronization primitive. While locked, it causes the waiting threads to spin in a loop until the lock is released. It should be used only for locking short critical sections (typically less than 20 instructions) when fairness is not an issue. If zero-initialized, the mutex is considered unheld. @ingroup synchronization */ class spin_mutex { public: //! Constructors spin_mutex() noexcept : m_flag(false) { create_itt_sync(this, "tbb::spin_mutex", ""); }; //! Destructor ~spin_mutex() = default; //! No Copy spin_mutex(const spin_mutex&) = delete; spin_mutex& operator=(const spin_mutex&) = delete; using scoped_lock = unique_scoped_lock; //! Mutex traits static constexpr bool is_rw_mutex = false; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = false; //! Acquire lock /** Spin if the lock is taken */ void lock() { atomic_backoff backoff; call_itt_notify(prepare, this); while (m_flag.exchange(true)) backoff.pause(); call_itt_notify(acquired, this); } //! Try acquiring lock (non-blocking) /** Return true if lock acquired; false otherwise. */ bool try_lock() { bool result = !m_flag.exchange(true); if (result) { call_itt_notify(acquired, this); } return result; } //! Release lock void unlock() { call_itt_notify(releasing, this); m_flag.store(false, std::memory_order_release); } protected: std::atomic m_flag; }; // class spin_mutex #if TBB_USE_PROFILING_TOOLS inline void set_name(spin_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } #if (_WIN32||_WIN64) inline void set_name(spin_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif //WIN #else inline void set_name(spin_mutex&, const char*) {} #if (_WIN32||_WIN64) inline void set_name(spin_mutex&, const wchar_t*) {} #endif // WIN #endif } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::spin_mutex; } // namespace v1 namespace profiling { using detail::d1::set_name; } } // namespace tbb #include "detail/_rtm_mutex.h" namespace tbb { inline namespace v1 { #if __TBB_TSX_INTRINSICS_PRESENT using speculative_spin_mutex = detail::d1::rtm_mutex; #else using speculative_spin_mutex = detail::d1::spin_mutex; #endif } } #endif /* __TBB_spin_mutex_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/spin_rw_mutex.h000066400000000000000000000172371514453371700316660ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_spin_rw_mutex_H #define __TBB_spin_rw_mutex_H #include "detail/_namespace_injection.h" #include "detail/_mutex_common.h" #include "profiling.h" #include "detail/_assert.h" #include "detail/_utils.h" #include "detail/_scoped_lock.h" #include namespace tbb { namespace detail { namespace d1 { #if __TBB_TSX_INTRINSICS_PRESENT class rtm_rw_mutex; #endif //! Fast, unfair, spinning reader-writer lock with backoff and writer-preference /** @ingroup synchronization */ class spin_rw_mutex { public: //! Constructors spin_rw_mutex() noexcept : m_state(0) { create_itt_sync(this, "tbb::spin_rw_mutex", ""); } //! Destructor ~spin_rw_mutex() { __TBB_ASSERT(!m_state, "destruction of an acquired mutex"); } //! No Copy spin_rw_mutex(const spin_rw_mutex&) = delete; spin_rw_mutex& operator=(const spin_rw_mutex&) = delete; using scoped_lock = rw_scoped_lock; //! Mutex traits static constexpr bool is_rw_mutex = true; static constexpr bool is_recursive_mutex = false; static constexpr bool is_fair_mutex = false; //! Acquire lock void lock() { call_itt_notify(prepare, this); for (atomic_backoff backoff; ; backoff.pause()) { state_type s = m_state.load(std::memory_order_relaxed); if (!(s & BUSY)) { // no readers, no writers if (m_state.compare_exchange_strong(s, WRITER)) break; // successfully stored writer flag backoff.reset(); // we could be very close to complete op. } else if (!(s & WRITER_PENDING)) { // no pending writers m_state |= WRITER_PENDING; } } call_itt_notify(acquired, this); } //! Try acquiring lock (non-blocking) /** Return true if lock acquired; false otherwise. */ bool try_lock() { // for a writer: only possible to acquire if no active readers or writers state_type s = m_state.load(std::memory_order_relaxed); if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 if (m_state.compare_exchange_strong(s, WRITER)) { call_itt_notify(acquired, this); return true; // successfully stored writer flag } } return false; } //! Release lock void unlock() { call_itt_notify(releasing, this); m_state &= READERS; } //! Lock shared ownership mutex void lock_shared() { call_itt_notify(prepare, this); for (atomic_backoff b; ; b.pause()) { state_type s = m_state.load(std::memory_order_relaxed); if (!(s & (WRITER | WRITER_PENDING))) { // no writer or write requests state_type prev_state = m_state.fetch_add(ONE_READER); if (!(prev_state & WRITER)) { break; // successfully stored increased number of readers } // writer got there first, undo the increment m_state -= ONE_READER; } } call_itt_notify(acquired, this); __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); } //! Try lock shared ownership mutex bool try_lock_shared() { // for a reader: acquire if no active or waiting writers state_type s = m_state.load(std::memory_order_relaxed); if (!(s & (WRITER | WRITER_PENDING))) { // no writers state_type prev_state = m_state.fetch_add(ONE_READER); if (!(prev_state & WRITER)) { // got the lock call_itt_notify(acquired, this); return true; // successfully stored increased number of readers } // writer got there first, undo the increment m_state -= ONE_READER; } return false; } //! Unlock shared ownership mutex void unlock_shared() { __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); call_itt_notify(releasing, this); m_state -= ONE_READER; } protected: /** Internal non ISO C++ standard API **/ //! This API is used through the scoped_lock class //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ bool upgrade() { state_type s = m_state.load(std::memory_order_relaxed); __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); // Check and set writer-pending flag. // Required conditions: either no pending writers, or we are the only reader // (with multiple readers and pending writer, another upgrade could have been requested) while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { atomic_backoff backoff; while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) backoff.pause(); __TBB_ASSERT((m_state & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer"); // Both new readers and writers are blocked at this time m_state -= (ONE_READER + WRITER_PENDING); return true; // successfully upgraded } } // Slow reacquire unlock_shared(); lock(); return false; } //! Downgrade writer to a reader void downgrade() { call_itt_notify(releasing, this); m_state += (ONE_READER - WRITER); __TBB_ASSERT(m_state & READERS, "invalid state after downgrade: no readers"); } using state_type = std::intptr_t; static constexpr state_type WRITER = 1; static constexpr state_type WRITER_PENDING = 2; static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); static constexpr state_type ONE_READER = 4; static constexpr state_type BUSY = WRITER | READERS; friend scoped_lock; //! State of lock /** Bit 0 = writer is holding lock Bit 1 = request by a writer to acquire lock (hint to readers to wait) Bit 2..N = number of readers holding lock */ std::atomic m_state; }; // class spin_rw_mutex #if TBB_USE_PROFILING_TOOLS inline void set_name(spin_rw_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } #if (_WIN32||_WIN64) inline void set_name(spin_rw_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif // WIN #else inline void set_name(spin_rw_mutex&, const char*) {} #if (_WIN32||_WIN64) inline void set_name(spin_rw_mutex&, const wchar_t*) {} #endif // WIN #endif } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::spin_rw_mutex; } // namespace v1 namespace profiling { using detail::d1::set_name; } } // namespace tbb #include "detail/_rtm_rw_mutex.h" namespace tbb { inline namespace v1 { #if __TBB_TSX_INTRINSICS_PRESENT using speculative_spin_rw_mutex = detail::d1::rtm_rw_mutex; #else using speculative_spin_rw_mutex = detail::d1::spin_rw_mutex; #endif } } #endif /* __TBB_spin_rw_mutex_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/task.h000066400000000000000000000021021514453371700277060ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_task_H #define __TBB_task_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_task.h" namespace tbb { inline namespace v1 { namespace task { #if __TBB_RESUMABLE_TASKS using detail::d1::suspend_point; using detail::d1::resume; using detail::d1::suspend; #endif /* __TBB_RESUMABLE_TASKS */ using detail::d1::current_context; } // namespace task } // namespace v1 } // namespace tbb #endif /* __TBB_task_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/task_arena.h000066400000000000000000000427361514453371700310750ustar00rootroot00000000000000/* Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_task_arena_H #define __TBB_task_arena_H #include "detail/_config.h" #include "detail/_aligned_space.h" #include "detail/_attach.h" #include "detail/_exception.h" #include "detail/_namespace_injection.h" #include "detail/_small_object_pool.h" #include "detail/_task.h" #include "detail/_task_handle.h" #if __TBB_ARENA_BINDING #include "info.h" #endif /*__TBB_ARENA_BINDING*/ namespace tbb { namespace detail { namespace d1 { template class task_arena_function : public delegate_base { F &my_func; aligned_space my_return_storage; bool my_constructed{false}; // The function should be called only once. bool operator()() const override { new (my_return_storage.begin()) R(my_func()); return true; } public: task_arena_function(F& f) : my_func(f) {} // The function can be called only after operator() and only once. R consume_result() { my_constructed = true; return std::move(*(my_return_storage.begin())); } ~task_arena_function() override { if (my_constructed) { my_return_storage.begin()->~R(); } } }; template class task_arena_function : public delegate_base { F &my_func; bool operator()() const override { my_func(); return true; } public: task_arena_function(F& f) : my_func(f) {} void consume_result() const {} friend class task_arena_base; }; class task_arena_base; class task_scheduler_observer; } // namespace d1 namespace r1 { class arena; struct task_arena_impl; TBB_EXPORT void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); TBB_EXPORT void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); TBB_EXPORT void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); TBB_EXPORT bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); TBB_EXPORT void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); TBB_EXPORT void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); TBB_EXPORT int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); TBB_EXPORT void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); TBB_EXPORT void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); TBB_EXPORT void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_group_context&, d1::task_arena_base*); TBB_EXPORT void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); } // namespace r1 namespace d2 { inline void enqueue_impl(task_handle&& th, d1::task_arena_base* ta) { __TBB_ASSERT(th != nullptr, "Attempt to schedule empty task_handle"); auto& ctx = task_handle_accessor::ctx_of(th); // Do not access th after release r1::enqueue(*task_handle_accessor::release(th), ctx, ta); } } //namespace d2 namespace d1 { static constexpr int priority_stride = INT_MAX / 4; class task_arena_base { friend struct r1::task_arena_impl; friend void r1::observe(d1::task_scheduler_observer&, bool); public: enum class priority : int { low = 1 * priority_stride, normal = 2 * priority_stride, high = 3 * priority_stride }; #if __TBB_ARENA_BINDING using constraints = tbb::detail::d1::constraints; #endif /*__TBB_ARENA_BINDING*/ protected: //! Special settings intptr_t my_version_and_traits; std::atomic my_initialization_state; //! NULL if not currently initialized. std::atomic my_arena; static_assert(sizeof(std::atomic) == sizeof(r1::arena*), "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer"); //! Concurrency level for deferred initialization int my_max_concurrency; //! Reserved slots for external threads unsigned my_num_reserved_slots; //! Arena priority priority my_priority; //! The NUMA node index to which the arena will be attached numa_node_id my_numa_id; //! The core type index to which arena will be attached core_type_id my_core_type; //! Number of threads per core int my_max_threads_per_core; // Backward compatibility checks. core_type_id core_type() const { return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_core_type : automatic; } int max_threads_per_core() const { return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_max_threads_per_core : automatic; } enum { default_flags = 0 , core_type_support_flag = 1 }; task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority) : my_version_and_traits(default_flags | core_type_support_flag) , my_initialization_state(do_once_state::uninitialized) , my_arena(nullptr) , my_max_concurrency(max_concurrency) , my_num_reserved_slots(reserved_for_masters) , my_priority(a_priority) , my_numa_id(automatic) , my_core_type(automatic) , my_max_threads_per_core(automatic) {} #if __TBB_ARENA_BINDING task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority) : my_version_and_traits(default_flags | core_type_support_flag) , my_initialization_state(do_once_state::uninitialized) , my_arena(nullptr) , my_max_concurrency(constraints_.max_concurrency) , my_num_reserved_slots(reserved_for_masters) , my_priority(a_priority) , my_numa_id(constraints_.numa_id) #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT , my_core_type(constraints_.core_type) , my_max_threads_per_core(constraints_.max_threads_per_core) #else , my_core_type(automatic) , my_max_threads_per_core(automatic) #endif {} #endif /*__TBB_ARENA_BINDING*/ public: //! Typedef for number of threads that is automatic. static const int automatic = -1; static const int not_initialized = -2; }; template R isolate_impl(F& f) { task_arena_function func(f); r1::isolate_within_arena(func, /*isolation*/ 0); return func.consume_result(); } template class enqueue_task : public task { small_object_allocator m_allocator; const F m_func; void finalize(const execution_data& ed) { m_allocator.delete_object(this, ed); } task* execute(execution_data& ed) override { m_func(); finalize(ed); return nullptr; } task* cancel(execution_data&) override { __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); return nullptr; } public: enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} }; template void enqueue_impl(F&& f, task_arena_base* ta) { small_object_allocator alloc{}; r1::enqueue(*alloc.new_object::type>>(std::forward(f), alloc), ta); } /** 1-to-1 proxy representation class of scheduler's arena * Constructors set up settings only, real construction is deferred till the first method invocation * Destructor only removes one of the references to the inner arena representation. * Final destruction happens when all the references (and the work) are gone. */ class task_arena : public task_arena_base { void mark_initialized() { __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" ); my_initialization_state.store(do_once_state::initialized, std::memory_order_release); } template R execute_impl(F& f) { initialize(); task_arena_function func(f); r1::execute(*this, func); return func.consume_result(); } public: //! Creates task_arena with certain concurrency limits /** Sets up settings only, real construction is deferred till the first method invocation * @arg max_concurrency specifies total number of slots in arena where threads work * @arg reserved_for_masters specifies number of slots to be used by external threads only. * Value of 1 is default and reflects behavior of implicit arenas. **/ task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1, priority a_priority = priority::normal) : task_arena_base(max_concurrency_, reserved_for_masters, a_priority) {} #if __TBB_ARENA_BINDING //! Creates task arena pinned to certain NUMA node task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1, priority a_priority = priority::normal) : task_arena_base(constraints_, reserved_for_masters, a_priority) {} //! Copies settings from another task_arena task_arena(const task_arena &s) // copy settings but not the reference or instance : task_arena_base( constraints{} .set_numa_id(s.my_numa_id) .set_max_concurrency(s.my_max_concurrency) #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT .set_core_type(s.my_core_type) .set_max_threads_per_core(s.my_max_threads_per_core) #endif , s.my_num_reserved_slots, s.my_priority) {} #else //! Copies settings from another task_arena task_arena(const task_arena& a) // copy settings but not the reference or instance : task_arena_base(a.my_max_concurrency, a.my_num_reserved_slots, a.my_priority) {} #endif /*__TBB_ARENA_BINDING*/ //! Tag class used to indicate the "attaching" constructor struct attach {}; //! Creates an instance of task_arena attached to the current arena of the thread explicit task_arena( attach ) : task_arena_base(automatic, 1, priority::normal) // use default settings if attach fails { if (r1::attach(*this)) { mark_initialized(); } } //! Creates an instance of task_arena attached to the current arena of the thread explicit task_arena(d1::attach) : task_arena(attach{}) {} //! Forces allocation of the resources for the task_arena as specified in constructor arguments void initialize() { atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state); } //! Overrides concurrency level and forces initialization of internal representation void initialize(int max_concurrency_, unsigned reserved_for_masters = 1, priority a_priority = priority::normal) { __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); if( !is_active() ) { my_max_concurrency = max_concurrency_; my_num_reserved_slots = reserved_for_masters; my_priority = a_priority; r1::initialize(*this); mark_initialized(); } } #if __TBB_ARENA_BINDING void initialize(constraints constraints_, unsigned reserved_for_masters = 1, priority a_priority = priority::normal) { __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); if( !is_active() ) { my_numa_id = constraints_.numa_id; my_max_concurrency = constraints_.max_concurrency; #if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT my_core_type = constraints_.core_type; my_max_threads_per_core = constraints_.max_threads_per_core; #endif my_num_reserved_slots = reserved_for_masters; my_priority = a_priority; r1::initialize(*this); mark_initialized(); } } #endif /*__TBB_ARENA_BINDING*/ //! Attaches this instance to the current arena of the thread void initialize(attach) { // TODO: decide if this call must be thread-safe __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); if( !is_active() ) { if ( !r1::attach(*this) ) { r1::initialize(*this); } mark_initialized(); } } //! Attaches this instance to the current arena of the thread void initialize(d1::attach) { initialize(attach{}); } //! Removes the reference to the internal arena representation. //! Not thread safe wrt concurrent invocations of other methods. void terminate() { if( is_active() ) { r1::terminate(*this); my_initialization_state.store(do_once_state::uninitialized, std::memory_order_relaxed); } } //! Removes the reference to the internal arena representation, and destroys the external object. //! Not thread safe wrt concurrent invocations of other methods. ~task_arena() { terminate(); } //! Returns true if the arena is active (initialized); false otherwise. //! The name was chosen to match a task_scheduler_init method with the same semantics. bool is_active() const { return my_initialization_state.load(std::memory_order_acquire) == do_once_state::initialized; } //! Enqueues a task into the arena to process a functor, and immediately returns. //! Does not require the calling thread to join the arena template void enqueue(F&& f) { initialize(); enqueue_impl(std::forward(f), this); } //! Enqueues a task into the arena to process a functor wrapped in task_handle, and immediately returns. //! Does not require the calling thread to join the arena void enqueue(d2::task_handle&& th) { initialize(); d2::enqueue_impl(std::move(th), this); } //! Joins the arena and executes a mutable functor, then returns //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). template auto execute(F&& f) -> decltype(f()) { return execute_impl(f); } #if __TBB_EXTRA_DEBUG //! Returns my_num_reserved_slots int debug_reserved_slots() const { // Handle special cases inside the library return my_num_reserved_slots; } //! Returns my_max_concurrency int debug_max_concurrency() const { // Handle special cases inside the library return my_max_concurrency; } //! Wait for all work in the arena to be completed //! Even submitted by other application threads //! Joins arena if/when possible (in the same way as execute()) void debug_wait_until_empty() { initialize(); r1::wait(*this); } #endif //__TBB_EXTRA_DEBUG //! Returns the maximal number of threads that can work inside the arena int max_concurrency() const { // Handle special cases inside the library return (my_max_concurrency > 1) ? my_max_concurrency : r1::max_concurrency(this); } friend void submit(task& t, task_arena& ta, task_group_context& ctx, bool as_critical) { __TBB_ASSERT(ta.is_active(), nullptr); call_itt_task_notify(releasing, &t); r1::submit(t, ctx, ta.my_arena.load(std::memory_order_relaxed), as_critical ? 1 : 0); } }; //! Executes a mutable functor in isolation within the current task arena. //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). template inline auto isolate(F&& f) -> decltype(f()) { return isolate_impl(f); } //! Returns the index, aka slot number, of the calling thread in its current arena inline int current_thread_index() { slot_id idx = r1::execution_slot(nullptr); return idx == slot_id(-1) ? task_arena_base::not_initialized : int(idx); } #if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS inline bool is_inside_task() { return nullptr != current_context(); } #endif //__TBB_PREVIEW_TASK_GROUP_EXTENSIONS //! Returns the maximal number of threads that can work inside the arena inline int max_concurrency() { return r1::max_concurrency(nullptr); } inline void enqueue(d2::task_handle&& th) { d2::enqueue_impl(std::move(th), nullptr); } template inline void enqueue(F&& f) { enqueue_impl(std::forward(f), nullptr); } using r1::submit; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::task_arena; using detail::d1::attach; #if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS using detail::d1::is_inside_task; #endif namespace this_task_arena { using detail::d1::current_thread_index; using detail::d1::max_concurrency; using detail::d1::isolate; using detail::d1::enqueue; } // namespace this_task_arena } // inline namespace v1 } // namespace tbb #endif /* __TBB_task_arena_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/task_group.h000066400000000000000000000632661514453371700311440ustar00rootroot00000000000000/* Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_task_group_H #define __TBB_task_group_H #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_assert.h" #include "detail/_utils.h" #include "detail/_template_helpers.h" #include "detail/_exception.h" #include "detail/_task.h" #include "detail/_small_object_pool.h" #include "detail/_intrusive_list_node.h" #include "detail/_task_handle.h" #include "profiling.h" #include #if _MSC_VER && !defined(__INTEL_COMPILER) // Suppress warning: structure was padded due to alignment specifier #pragma warning(push) #pragma warning(disable:4324) #endif namespace tbb { namespace detail { namespace d1 { class delegate_base; class task_arena_base; class task_group_context; class task_group_base; } namespace r1 { // Forward declarations class tbb_exception_ptr; class market; class thread_data; class task_dispatcher; template class context_guard_helper; struct task_arena_impl; class context_list; TBB_EXPORT void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); TBB_EXPORT void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); TBB_EXPORT void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); TBB_EXPORT void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); TBB_EXPORT void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); TBB_EXPORT bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); TBB_EXPORT bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); TBB_EXPORT void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); struct task_group_context_impl; } namespace d2 { namespace { template d1::task* task_ptr_or_nullptr(F&& f); } template class function_task : public task_handle_task { //TODO: apply empty base optimization here const F m_func; private: d1::task* execute(d1::execution_data& ed) override { __TBB_ASSERT(ed.context == &this->ctx(), "The task group context should be used for all tasks"); task* res = task_ptr_or_nullptr(m_func); finalize(&ed); return res; } d1::task* cancel(d1::execution_data& ed) override { finalize(&ed); return nullptr; } public: template function_task(FF&& f, d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc) : task_handle_task{wo, ctx, alloc}, m_func(std::forward(f)) {} }; #if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS namespace { template d1::task* task_ptr_or_nullptr_impl(std::false_type, F&& f){ task_handle th = std::forward(f)(); return task_handle_accessor::release(th); } template d1::task* task_ptr_or_nullptr_impl(std::true_type, F&& f){ std::forward(f)(); return nullptr; } template d1::task* task_ptr_or_nullptr(F&& f){ using is_void_t = std::is_void< decltype(std::forward(f)()) >; return task_ptr_or_nullptr_impl(is_void_t{}, std::forward(f)); } } #else namespace { template d1::task* task_ptr_or_nullptr(F&& f){ std::forward(f)(); return nullptr; } } // namespace #endif // __TBB_PREVIEW_TASK_GROUP_EXTENSIONS } // namespace d2 namespace d1 { // This structure is left here for backward compatibility check struct context_list_node { std::atomic prev{}; std::atomic next{}; }; //! Used to form groups of tasks /** @ingroup task_scheduling The context services explicit cancellation requests from user code, and unhandled exceptions intercepted during tasks execution. Intercepting an exception results in generating internal cancellation requests (which is processed in exactly the same way as external ones). The context is associated with one or more root tasks and defines the cancellation group that includes all the descendants of the corresponding root task(s). Association is established when a context object is passed as an argument to the task::allocate_root() method. See task_group_context::task_group_context for more details. The context can be bound to another one, and other contexts can be bound to it, forming a tree-like structure: parent -> this -> children. Arrows here designate cancellation propagation direction. If a task in a cancellation group is cancelled all the other tasks in this group and groups bound to it (as children) get cancelled too. **/ class task_group_context : no_copy { public: enum traits_type { fp_settings = 1 << 1, concurrent_wait = 1 << 2, default_traits = 0 }; enum kind_type { isolated, bound }; private: //! Space for platform-specific FPU settings. /** Must only be accessed inside TBB binaries, and never directly in user code or inline methods. */ std::uint64_t my_cpu_ctl_env; //! Specifies whether cancellation was requested for this task group. std::atomic my_cancellation_requested; //! Versioning for run-time checks and behavioral traits of the context. enum class task_group_context_version : std::uint8_t { unused = 1 // ensure that new versions, if any, will not clash with previously used ones }; task_group_context_version my_version; //! The context traits. struct context_traits { bool fp_settings : 1; bool concurrent_wait : 1; bool bound : 1; bool reserved1 : 1; bool reserved2 : 1; bool reserved3 : 1; bool reserved4 : 1; bool reserved5 : 1; } my_traits; static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte."); static constexpr std::uint8_t may_have_children = 1; //! The context internal state (currently only may_have_children). std::atomic my_may_have_children; enum class state : std::uint8_t { created, locked, isolated, bound, dead, proxy = std::uint8_t(-1) //the context is not the real one, but proxy to other one }; //! The synchronization machine state to manage lifetime. std::atomic my_state; union { //! Pointer to the context of the parent cancellation group. NULL for isolated contexts. task_group_context* my_parent; //! Pointer to the actual context 'this' context represents a proxy of. task_group_context* my_actual_context; }; //! Thread data instance that registered this context in its list. r1::context_list* my_context_list; static_assert(sizeof(std::atomic) == sizeof(r1::context_list*), "To preserve backward compatibility these types should have the same size"); //! Used to form the thread specific list of contexts without additional memory allocation. /** A context is included into the list of the current thread when its binding to its parent happens. Any context can be present in the list of one thread only. **/ intrusive_list_node my_node; static_assert(sizeof(intrusive_list_node) == sizeof(context_list_node), "To preserve backward compatibility these types should have the same size"); //! Pointer to the container storing exception being propagated across this task group. std::atomic my_exception; static_assert(sizeof(std::atomic) == sizeof(r1::tbb_exception_ptr*), "backward compatibility check"); //! Used to set and maintain stack stitching point for Intel Performance Tools. void* my_itt_caller; //! Description of algorithm for scheduler based instrumentation. string_resource_index my_name; char padding[max_nfs_size - sizeof(std::uint64_t) // my_cpu_ctl_env - sizeof(std::atomic) // my_cancellation_requested - sizeof(std::uint8_t) // my_version - sizeof(context_traits) // my_traits - sizeof(std::atomic) // my_state - sizeof(std::atomic) // my_state - sizeof(task_group_context*) // my_parent - sizeof(r1::context_list*) // my_context_list - sizeof(intrusive_list_node) // my_node - sizeof(std::atomic) // my_exception - sizeof(void*) // my_itt_caller - sizeof(string_resource_index) // my_name ]; task_group_context(context_traits t, string_resource_index name) : my_version{task_group_context_version::unused}, my_name{name} { my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers) r1::initialize(*this); } task_group_context(task_group_context* actual_context) : my_version{task_group_context_version::unused} , my_state{state::proxy} , my_actual_context{actual_context} { __TBB_ASSERT(my_actual_context, "Passed pointer value points to nothing."); my_name = actual_context->my_name; // no need to initialize 'this' context as it acts as a proxy for my_actual_context, which // initialization is a user-side responsibility. } static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) { context_traits ct; ct.fp_settings = (user_traits & fp_settings) == fp_settings; ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait; ct.bound = relation_with_parent == bound; ct.reserved1 = ct.reserved2 = ct.reserved3 = ct.reserved4 = ct.reserved5 = false; return ct; } bool is_proxy() const { return my_state.load(std::memory_order_relaxed) == state::proxy; } task_group_context& actual_context() noexcept { if (is_proxy()) { __TBB_ASSERT(my_actual_context, "Actual task_group_context is not set."); return *my_actual_context; } return *this; } const task_group_context& actual_context() const noexcept { if (is_proxy()) { __TBB_ASSERT(my_actual_context, "Actual task_group_context is not set."); return *my_actual_context; } return *this; } public: //! Default & binding constructor. /** By default a bound context is created. That is this context will be bound (as child) to the context of the currently executing task . Cancellation requests passed to the parent context are propagated to all the contexts bound to it. Similarly priority change is propagated from the parent context to its children. If task_group_context::isolated is used as the argument, then the tasks associated with this context will never be affected by events in any other context. Creating isolated contexts involve much less overhead, but they have limited utility. Normally when an exception occurs in an algorithm that has nested ones running, it is desirably to have all the nested algorithms cancelled as well. Such a behavior requires nested algorithms to use bound contexts. There is one good place where using isolated algorithms is beneficial. It is an external thread. That is if a particular algorithm is invoked directly from the external thread (not from a TBB task), supplying it with explicitly created isolated context will result in a faster algorithm startup. VERSIONING NOTE: Implementation(s) of task_group_context constructor(s) cannot be made entirely out-of-line because the run-time version must be set by the user code. This will become critically important for binary compatibility, if we ever have to change the size of the context object. **/ task_group_context(kind_type relation_with_parent = bound, std::uintptr_t t = default_traits) : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {} // Custom constructor for instrumentation of oneTBB algorithm task_group_context(string_resource_index name ) : task_group_context(make_traits(bound, default_traits), name) {} // Do not introduce any logic on user side since it might break state propagation assumptions ~task_group_context() { // When 'this' serves as a proxy, the initialization does not happen - nor should the // destruction. if (!is_proxy()) { r1::destroy(*this); } } //! Forcefully reinitializes the context after the task tree it was associated with is completed. /** Because the method assumes that all the tasks that used to be associated with this context have already finished, calling it while the context is still in use somewhere in the task hierarchy leads to undefined behavior. IMPORTANT: This method is not thread safe! The method does not change the context's parent if it is set. **/ void reset() { r1::reset(actual_context()); } //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. /** \return false if cancellation has already been requested, true otherwise. Note that canceling never fails. When false is returned, it just means that another thread (or this one) has already sent cancellation request to this context or to one of its ancestors (if this context is bound). It is guaranteed that when this method is concurrently called on the same not yet cancelled context, true will be returned by one and only one invocation. **/ bool cancel_group_execution() { return r1::cancel_group_execution(actual_context()); } //! Returns true if the context received cancellation request. bool is_group_execution_cancelled() { return r1::is_group_execution_cancelled(actual_context()); } #if __TBB_FP_CONTEXT //! Captures the current FPU control settings to the context. /** Because the method assumes that all the tasks that used to be associated with this context have already finished, calling it while the context is still in use somewhere in the task hierarchy leads to undefined behavior. IMPORTANT: This method is not thread safe! The method does not change the FPU control settings of the context's parent. **/ void capture_fp_settings() { r1::capture_fp_settings(actual_context()); } #endif //! Returns the user visible context trait std::uintptr_t traits() const { std::uintptr_t t{}; const task_group_context& ctx = actual_context(); t |= ctx.my_traits.fp_settings ? fp_settings : 0; t |= ctx.my_traits.concurrent_wait ? concurrent_wait : 0; return t; } private: //// TODO: cleanup friends friend class r1::market; friend class r1::thread_data; friend class r1::task_dispatcher; template friend class r1::context_guard_helper; friend struct r1::task_arena_impl; friend struct r1::task_group_context_impl; friend class task_group_base; }; // class task_group_context static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); enum task_group_status { not_complete, complete, canceled }; class task_group; class structured_task_group; #if TBB_PREVIEW_ISOLATED_TASK_GROUP class isolated_task_group; #endif template class function_task : public task { const F m_func; wait_context& m_wait_ctx; small_object_allocator m_allocator; void finalize(const execution_data& ed) { // Make a local reference not to access this after destruction. wait_context& wo = m_wait_ctx; // Copy allocator to the stack auto allocator = m_allocator; // Destroy user functor before release wait. this->~function_task(); wo.release(); allocator.deallocate(this, ed); } task* execute(execution_data& ed) override { task* res = d2::task_ptr_or_nullptr(m_func); finalize(ed); return res; } task* cancel(execution_data& ed) override { finalize(ed); return nullptr; } public: function_task(const F& f, wait_context& wo, small_object_allocator& alloc) : m_func(f) , m_wait_ctx(wo) , m_allocator(alloc) {} function_task(F&& f, wait_context& wo, small_object_allocator& alloc) : m_func(std::move(f)) , m_wait_ctx(wo) , m_allocator(alloc) {} }; template class function_stack_task : public task { const F& m_func; wait_context& m_wait_ctx; void finalize() { m_wait_ctx.release(); } task* execute(execution_data&) override { task* res = d2::task_ptr_or_nullptr(m_func); finalize(); return res; } task* cancel(execution_data&) override { finalize(); return nullptr; } public: function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {} }; class task_group_base : no_copy { protected: wait_context m_wait_ctx; task_group_context m_context; template task_group_status internal_run_and_wait(const F& f) { function_stack_task t{ f, m_wait_ctx }; m_wait_ctx.reserve(); bool cancellation_status = false; try_call([&] { execute_and_wait(t, context(), m_wait_ctx, context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = context().is_group_execution_cancelled(); context().reset(); }); return cancellation_status ? canceled : complete; } task_group_status internal_run_and_wait(d2::task_handle&& h) { __TBB_ASSERT(h != nullptr, "Attempt to schedule empty task_handle"); using acs = d2::task_handle_accessor; __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); bool cancellation_status = false; try_call([&] { execute_and_wait(*acs::release(h), context(), m_wait_ctx, context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = context().is_group_execution_cancelled(); context().reset(); }); return cancellation_status ? canceled : complete; } template task* prepare_task(F&& f) { m_wait_ctx.reserve(); small_object_allocator alloc{}; return alloc.new_object::type>>(std::forward(f), m_wait_ctx, alloc); } task_group_context& context() noexcept { return m_context.actual_context(); } template d2::task_handle prepare_task_handle(F&& f) { m_wait_ctx.reserve(); small_object_allocator alloc{}; using function_task_t = d2::function_task::type>; d2::task_handle_task* function_task_p = alloc.new_object(std::forward(f), m_wait_ctx, context(), alloc); return d2::task_handle_accessor::construct(function_task_p); } public: task_group_base(uintptr_t traits = 0) : m_wait_ctx(0) , m_context(task_group_context::bound, task_group_context::default_traits | traits) {} task_group_base(task_group_context& ctx) : m_wait_ctx(0) , m_context(&ctx) {} ~task_group_base() noexcept(false) { if (m_wait_ctx.continue_execution()) { #if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0; #else bool stack_unwinding_in_progress = std::uncaught_exception(); #endif // Always attempt to do proper cleanup to avoid inevitable memory corruption // in case of missing wait (for the sake of better testability & debuggability) if (!context().is_group_execution_cancelled()) cancel(); d1::wait(m_wait_ctx, context()); if (!stack_unwinding_in_progress) throw_exception(exception_id::missing_wait); } } task_group_status wait() { bool cancellation_status = false; try_call([&] { d1::wait(m_wait_ctx, context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = m_context.is_group_execution_cancelled(); context().reset(); }); return cancellation_status ? canceled : complete; } void cancel() { context().cancel_group_execution(); } }; // class task_group_base class task_group : public task_group_base { public: task_group() : task_group_base(task_group_context::concurrent_wait) {} task_group(task_group_context& ctx) : task_group_base(ctx) {} template void run(F&& f) { spawn(*prepare_task(std::forward(f)), context()); } void run(d2::task_handle&& h) { __TBB_ASSERT(h != nullptr, "Attempt to schedule empty task_handle"); using acs = d2::task_handle_accessor; __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); spawn(*acs::release(h), context()); } template d2::task_handle defer(F&& f) { return prepare_task_handle(std::forward(f)); } template task_group_status run_and_wait(const F& f) { return internal_run_and_wait(f); } task_group_status run_and_wait(d2::task_handle&& h) { return internal_run_and_wait(std::move(h)); } }; // class task_group #if TBB_PREVIEW_ISOLATED_TASK_GROUP class spawn_delegate : public delegate_base { task* task_to_spawn; task_group_context& context; bool operator()() const override { spawn(*task_to_spawn, context); return true; } public: spawn_delegate(task* a_task, task_group_context& ctx) : task_to_spawn(a_task), context(ctx) {} }; class wait_delegate : public delegate_base { bool operator()() const override { status = tg.wait(); return true; } protected: task_group& tg; task_group_status& status; public: wait_delegate(task_group& a_group, task_group_status& tgs) : tg(a_group), status(tgs) {} }; template class run_wait_delegate : public wait_delegate { F& func; bool operator()() const override { status = tg.run_and_wait(func); return true; } public: run_wait_delegate(task_group& a_group, F& a_func, task_group_status& tgs) : wait_delegate(a_group, tgs), func(a_func) {} }; class isolated_task_group : public task_group { intptr_t this_isolation() { return reinterpret_cast(this); } public: isolated_task_group() : task_group() {} isolated_task_group(task_group_context& ctx) : task_group(ctx) {} template void run(F&& f) { spawn_delegate sd(prepare_task(std::forward(f)), context()); r1::isolate_within_arena(sd, this_isolation()); } void run(d2::task_handle&& h) { __TBB_ASSERT(h != nullptr, "Attempt to schedule empty task_handle"); using acs = d2::task_handle_accessor; __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); spawn_delegate sd(acs::release(h), context()); r1::isolate_within_arena(sd, this_isolation()); } template task_group_status run_and_wait( const F& f ) { task_group_status result = not_complete; run_wait_delegate rwd(*this, f, result); r1::isolate_within_arena(rwd, this_isolation()); __TBB_ASSERT(result != not_complete, "premature exit from wait?"); return result; } task_group_status wait() { task_group_status result = not_complete; wait_delegate wd(*this, result); r1::isolate_within_arena(wd, this_isolation()); __TBB_ASSERT(result != not_complete, "premature exit from wait?"); return result; } }; // class isolated_task_group #endif // TBB_PREVIEW_ISOLATED_TASK_GROUP inline bool is_current_task_group_canceling() { task_group_context* ctx = current_context(); return ctx ? ctx->is_group_execution_cancelled() : false; } } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::task_group_context; using detail::d1::task_group; #if TBB_PREVIEW_ISOLATED_TASK_GROUP using detail::d1::isolated_task_group; #endif using detail::d1::task_group_status; using detail::d1::not_complete; using detail::d1::complete; using detail::d1::canceled; using detail::d1::is_current_task_group_canceling; using detail::r1::missing_wait; using detail::d2::task_handle; } } // namespace tbb #if _MSC_VER && !defined(__INTEL_COMPILER) #pragma warning(pop) // 4324 warning #endif #endif // __TBB_task_group_H task_scheduler_observer.h000066400000000000000000000107511514453371700336050ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_task_scheduler_observer_H #define __TBB_task_scheduler_observer_H #include "detail/_namespace_injection.h" #include "task_arena.h" #include namespace tbb { namespace detail { namespace d1 { class task_scheduler_observer; } namespace r1 { class observer_proxy; class observer_list; //! Enable or disable observation /** For local observers the method can be used only when the current thread has the task scheduler initialized or is attached to an arena. Repeated calls with the same state are no-ops. **/ TBB_EXPORT void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); } namespace d1 { class task_scheduler_observer { friend class r1::observer_proxy; friend class r1::observer_list; friend void r1::observe(d1::task_scheduler_observer&, bool); //! Pointer to the proxy holding this observer. /** Observers are proxied by the scheduler to maintain persistent lists of them. **/ std::atomic my_proxy{ nullptr }; //! Counter preventing the observer from being destroyed while in use by the scheduler. /** Valid only when observation is on. **/ std::atomic my_busy_count{ 0 }; //! Contains task_arena pointer task_arena* my_task_arena{ nullptr }; public: //! Returns true if observation is enabled, false otherwise. bool is_observing() const { return my_proxy.load(std::memory_order_relaxed) != nullptr; } //! Entry notification /** Invoked from inside observe(true) call and whenever a worker enters the arena this observer is associated with. If a thread is already in the arena when the observer is activated, the entry notification is called before it executes the first stolen task. **/ virtual void on_scheduler_entry( bool /*is_worker*/ ) {} //! Exit notification /** Invoked from inside observe(false) call and whenever a worker leaves the arena this observer is associated with. **/ virtual void on_scheduler_exit( bool /*is_worker*/ ) {} //! Construct local or global observer in inactive state (observation disabled). /** For a local observer entry/exit notifications are invoked whenever a worker thread joins/leaves the arena of the observer's owner thread. If a thread is already in the arena when the observer is activated, the entry notification is called before it executes the first stolen task. **/ explicit task_scheduler_observer() = default; //! Construct local observer for a given arena in inactive state (observation disabled). /** entry/exit notifications are invoked whenever a thread joins/leaves arena. If a thread is already in the arena when the observer is activated, the entry notification is called before it executes the first stolen task. **/ explicit task_scheduler_observer(task_arena& a) : my_task_arena(&a) {} /** Destructor protects instance of the observer from concurrent notification. It is recommended to disable observation before destructor of a derived class starts, otherwise it can lead to concurrent notification callback on partly destroyed object **/ virtual ~task_scheduler_observer() { if (my_proxy.load(std::memory_order_acquire)) { observe(false); } } //! Enable or disable observation /** Warning: concurrent invocations of this method are not safe. Repeated calls with the same state are no-ops. **/ void observe(bool state = true) { if( state && !my_proxy.load(std::memory_order_relaxed) ) { __TBB_ASSERT( my_busy_count.load(std::memory_order_relaxed) == 0, "Inconsistent state of task_scheduler_observer instance"); } r1::observe(*this, state); } }; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::task_scheduler_observer; } } // namespace tbb #endif /* __TBB_task_scheduler_observer_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/tbb_allocator.h000066400000000000000000000073061514453371700315660ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_tbb_allocator_H #define __TBB_tbb_allocator_H #include "oneapi/tbb/detail/_utils.h" #include "detail/_namespace_injection.h" #include #include #if __TBB_CPP17_MEMORY_RESOURCE_PRESENT #include #endif namespace tbb { namespace detail { namespace r1 { TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate_memory(void* p); TBB_EXPORT bool __TBB_EXPORTED_FUNC is_tbbmalloc_used(); } namespace d1 { template class tbb_allocator { public: using value_type = T; using propagate_on_container_move_assignment = std::true_type; //! Always defined for TBB containers (supported since C++17 for std containers) using is_always_equal = std::true_type; //! Specifies current allocator enum malloc_type { scalable, standard }; tbb_allocator() = default; template tbb_allocator(const tbb_allocator&) noexcept {} //! Allocate space for n objects. __TBB_nodiscard T* allocate(std::size_t n) { return static_cast(r1::allocate_memory(n * sizeof(value_type))); } //! Free previously allocated block of memory. void deallocate(T* p, std::size_t) { r1::deallocate_memory(p); } //! Returns current allocator static malloc_type allocator_type() { return r1::is_tbbmalloc_used() ? standard : scalable; } #if TBB_ALLOCATOR_TRAITS_BROKEN using pointer = value_type*; using const_pointer = const value_type*; using reference = value_type&; using const_reference = const value_type&; using difference_type = std::ptrdiff_t; using size_type = std::size_t; template struct rebind { using other = tbb_allocator; }; //! Largest value for which method allocate might succeed. size_type max_size() const noexcept { size_type max = ~(std::size_t(0)) / sizeof(value_type); return (max > 0 ? max : 1); } template void construct(U *p, Args&&... args) { ::new (p) U(std::forward(args)...); } void destroy( pointer p ) { p->~value_type(); } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } #endif // TBB_ALLOCATOR_TRAITS_BROKEN }; #if TBB_ALLOCATOR_TRAITS_BROKEN template<> class tbb_allocator { public: using pointer = void*; using const_pointer = const void*; using value_type = void; template struct rebind { using other = tbb_allocator; }; }; #endif template inline bool operator==(const tbb_allocator&, const tbb_allocator&) noexcept { return true; } #if !__TBB_CPP20_COMPARISONS_PRESENT template inline bool operator!=(const tbb_allocator&, const tbb_allocator&) noexcept { return false; } #endif } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::tbb_allocator; } // namespace v1 } // namespace tbb #endif /* __TBB_tbb_allocator_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/tbbmalloc_proxy.h000066400000000000000000000035631514453371700321600ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /* Replacing the standard memory allocation routines in Microsoft* C/C++ RTL (malloc/free, global new/delete, etc.) with the TBB memory allocator. Include the following header to a source of any binary which is loaded during application startup #include "oneapi/tbb/tbbmalloc_proxy.h" or add following parameters to the linker options for the binary which is loaded during application startup. It can be either exe-file or dll. For win32 tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy" win64 tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy" */ #ifndef __TBB_tbbmalloc_proxy_H #define __TBB_tbbmalloc_proxy_H #if _MSC_VER #ifdef _DEBUG #pragma comment(lib, "tbbmalloc_proxy_debug.lib") #else #pragma comment(lib, "tbbmalloc_proxy.lib") #endif #if defined(_WIN64) #pragma comment(linker, "/include:__TBB_malloc_proxy") #else #pragma comment(linker, "/include:___TBB_malloc_proxy") #endif #else /* Primarily to support MinGW */ extern "C" void __TBB_malloc_proxy(); struct __TBB_malloc_proxy_caller { __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); } } volatile __TBB_malloc_proxy_helper_object; #endif // _MSC_VER /* Public Windows API */ extern "C" int TBB_malloc_replacement_log(char *** function_replacement_log_ptr); #endif //__TBB_tbbmalloc_proxy_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/tick_count.h000066400000000000000000000062011514453371700311120ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_tick_count_H #define __TBB_tick_count_H #include #include "detail/_namespace_injection.h" namespace tbb { namespace detail { namespace d1 { //! Absolute timestamp /** @ingroup timing */ class tick_count { public: using clock_type = typename std::conditional::type; //! Relative time interval. class interval_t : public clock_type::duration { public: //! Construct a time interval representing zero time duration interval_t() : clock_type::duration(clock_type::duration::zero()) {} //! Construct a time interval representing sec seconds time duration explicit interval_t( double sec ) : clock_type::duration(std::chrono::duration_cast(std::chrono::duration(sec))) {} //! Return the length of a time interval in seconds double seconds() const { return std::chrono::duration_cast>(*this).count(); } //! Extract the intervals from the tick_counts and subtract them. friend interval_t operator-( const tick_count& t1, const tick_count& t0 ); //! Add two intervals. friend interval_t operator+( const interval_t& i, const interval_t& j ) { return interval_t(std::chrono::operator+(i, j)); } //! Subtract two intervals. friend interval_t operator-( const interval_t& i, const interval_t& j ) { return interval_t(std::chrono::operator-(i, j)); } private: explicit interval_t( clock_type::duration value_ ) : clock_type::duration(value_) {} }; tick_count() = default; //! Return current time. static tick_count now() { return clock_type::now(); } //! Subtract two timestamps to get the time interval between friend interval_t operator-( const tick_count& t1, const tick_count& t0 ) { return tick_count::interval_t(t1.my_time_point - t0.my_time_point); } //! Return the resolution of the clock in seconds per tick. static double resolution() { return static_cast(interval_t::period::num) / interval_t::period::den; } private: clock_type::time_point my_time_point; tick_count( clock_type::time_point tp ) : my_time_point(tp) {} }; } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::tick_count; } // namespace v1 } // namespace tbb #endif /* __TBB_tick_count_H */ level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/oneapi/tbb/version.h000066400000000000000000000076471514453371700304540ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB_version_H #define __TBB_version_H #include "detail/_config.h" #include "detail/_namespace_injection.h" // Product version #define TBB_VERSION_MAJOR 2021 // Update version #define TBB_VERSION_MINOR 6 // "Patch" version for custom releases #define TBB_VERSION_PATCH 0 // Suffix string #define __TBB_VERSION_SUFFIX "" // Full official version string #define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX // OneAPI oneTBB specification version #define ONETBB_SPEC_VERSION "1.0" // Full interface version #define TBB_INTERFACE_VERSION 12060 // Major interface version #define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) // Minor interface version #define TBB_INTERFACE_VERSION_MINOR (TBB_INTERFACE_VERSION%1000/10) // The binary compatibility version // To be used in SONAME, manifests, etc. #define __TBB_BINARY_VERSION 12 //! TBB_VERSION support #ifndef ENDL #define ENDL "\n" #endif //TBB_REVAMP_TODO: consider enabling version_string.ver generation //TBB_REVAMP_TODO: #include "version_string.ver" #define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL #define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL #define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL #ifndef TBB_USE_DEBUG #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL #elif TBB_USE_DEBUG==0 #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL #elif TBB_USE_DEBUG==1 #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL #elif TBB_USE_DEBUG==2 #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL #else #error Unexpected value for TBB_USE_DEBUG #endif #ifndef TBB_USE_ASSERT #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL #elif TBB_USE_ASSERT==0 #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL #elif TBB_USE_ASSERT==1 #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL #elif TBB_USE_ASSERT==2 #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL #else #error Unexpected value for TBB_USE_ASSERT #endif #define TBB_VERSION_STRINGS_P(N) \ __TBB_ONETBB_SPEC_VERSION(N) \ __TBB_VERSION_NUMBER(N) \ __TBB_INTERFACE_VERSION_NUMBER(N) \ __TBB_VERSION_USE_DEBUG(N) \ __TBB_VERSION_USE_ASSERT(N) #define TBB_VERSION_STRINGS TBB_VERSION_STRINGS_P(oneTBB) #define TBBMALLOC_VERSION_STRINGS TBB_VERSION_STRINGS_P(TBBmalloc) //! The function returns the version string for the Intel(R) oneAPI Threading Building Blocks (oneTBB) //! shared library being used. /** * The returned pointer is an address of a string in the shared library. * It can be different than the TBB_VERSION_STRING obtained at compile time. */ extern "C" TBB_EXPORT const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); //! The function returns the interface version of the oneTBB shared library being used. /** * The returned version is determined at runtime, not at compile/link time. * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. */ extern "C" TBB_EXPORT int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); #endif // __TBB_version_H level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/000077500000000000000000000000001514453371700253255ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/blocked_range.h000066400000000000000000000012171514453371700302560ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/blocked_range.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/blocked_range2d.h000066400000000000000000000012211514453371700304770ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/blocked_range2d.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/blocked_range3d.h000066400000000000000000000012211514453371700305000ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/blocked_range3d.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/blocked_rangeNd.h000066400000000000000000000012211514453371700305330ustar00rootroot00000000000000/* Copyright (c) 2017-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/blocked_rangeNd.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/cache_aligned_allocator.h000066400000000000000000000012311514453371700322610ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/cache_aligned_allocator.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/collaborative_call_once.h000066400000000000000000000012241514453371700323220ustar00rootroot00000000000000/* Copyright (c) 2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/collaborative_call_once.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/combinable.h000066400000000000000000000012141514453371700275670ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/combinable.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_hash_map.h000066400000000000000000000012251514453371700315200ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_hash_map.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_lru_cache.h000066400000000000000000000012261514453371700316660ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_lru_cache.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_map.h000066400000000000000000000012201514453371700305100ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_map.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_priority_queue.h000066400000000000000000000012331514453371700330240ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_priority_queue.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_queue.h000066400000000000000000000012221514453371700310610ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_queue.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_set.h000066400000000000000000000012201514453371700305260ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_set.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_unordered_map.h000066400000000000000000000012321514453371700325620ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_unordered_map.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_unordered_set.h000066400000000000000000000012321514453371700326000ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_unordered_set.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/concurrent_vector.h000066400000000000000000000012231514453371700312400ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/concurrent_vector.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/enumerable_thread_specific.h000066400000000000000000000012341514453371700330110ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/enumerable_thread_specific.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/flow_graph.h000066400000000000000000000012141514453371700276240ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/flow_graph.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/flow_graph_abstractions.h000066400000000000000000000012311514453371700323770ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/flow_graph_abstractions.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/global_control.h000066400000000000000000000012201514453371700304710ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/global_control.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/info.h000066400000000000000000000012061514453371700264300ustar00rootroot00000000000000/* Copyright (c) 2019-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/info.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/memory_pool.h000066400000000000000000000012151514453371700300360ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/memory_pool.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/null_mutex.h000066400000000000000000000012141514453371700276700ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/null_mutex.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/null_rw_mutex.h000066400000000000000000000012171514453371700304030ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/null_rw_mutex.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_for.h000066400000000000000000000012161514453371700301400ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_for.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_for_each.h000066400000000000000000000012231514453371700311160ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_for_each.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_invoke.h000066400000000000000000000012211514453371700306410ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_invoke.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_pipeline.h000066400000000000000000000012231514453371700311550ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_pipeline.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_reduce.h000066400000000000000000000012211514453371700306150ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_reduce.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_scan.h000066400000000000000000000012171514453371700302770ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_scan.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/parallel_sort.h000066400000000000000000000012171514453371700303420ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/parallel_sort.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/partitioner.h000066400000000000000000000012151514453371700300350ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/partitioner.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/profiling.h000066400000000000000000000012131514453371700274640ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/profiling.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/queuing_mutex.h000066400000000000000000000012171514453371700303760ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/queuing_mutex.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/queuing_rw_mutex.h000066400000000000000000000012221514453371700311020ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/queuing_rw_mutex.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/scalable_allocator.h000066400000000000000000000012241514453371700313030ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/scalable_allocator.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/spin_mutex.h000066400000000000000000000012141514453371700276670ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/spin_mutex.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/spin_rw_mutex.h000066400000000000000000000012171514453371700304020ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/spin_rw_mutex.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/task.h000066400000000000000000000012061514453371700264370ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/task.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/task_arena.h000066400000000000000000000012141514453371700276040ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/task_arena.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/task_group.h000066400000000000000000000012141514453371700276520ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/task_group.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/task_scheduler_observer.h000066400000000000000000000012311514453371700324020ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/task_scheduler_observer.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/tbb.h000066400000000000000000000012011514453371700262370ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/tbb_allocator.h000066400000000000000000000012171514453371700303060ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/tbb_allocator.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/tbbmalloc_proxy.h000066400000000000000000000012211514453371700306720ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/tbbmalloc_proxy.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/tick_count.h000066400000000000000000000012141514453371700276360ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/tick_count.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/include/tbb/version.h000066400000000000000000000012111514453371700271560ustar00rootroot00000000000000/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "../oneapi/tbb/version.h" level-zero-raytracing-support-1.2.3/external/tbb/v2021.6.0/third-party-programs.txt000066400000000000000000001336031514453371700300010ustar00rootroot00000000000000oneAPI Threading Building Blocks (oneTBB) Third Party Programs File This file contains the list of third party software ("third party programs") contained in the Intel software and their required notices and/or license terms. This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the "third-party-programs.txt" or other similarlynamed text file. The third party programs and their corresponding required notices and/or license terms are listed below. _______________________________________________________________________________________________________ 1. Intel(R) Instrumentation and Tracing Technology (ITT) Copyright (c) 2019 Intel Corporation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _______________________________________________________________________________________________________ 2. ActiveState Thread pool with same API as (multi) processing.Pool (Python recipe): Copyright (c) 2008,2016 david decotigny (this file) Copyright (c) 2006-2008, R Oudkerk (multiprocessing.Pool) Portable Hardware Locality (hwloc) Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. Copyright (c) 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2009 CNRS Copyright (c) 2009-2016 Inria. All rights reserved. Copyright (c) 2009-2015 Universit Bordeaux Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. Copyright (c) 2010 IBM Copyright (c) 2010 Jirka Hladky Copyright (c) 2012 Aleksej Saushev, The NetBSD Foundation Copyright (c) 2012 Blue Brain Project, EPFL. All rights reserved. Copyright (c) 2013-2014 University of Wisconsin-La Crosse. All rights reserved. Copyright (c) 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. Copyright (c) 2015-2016 Intel, Inc. All rights reserved. BSD 3-clause "New" or "Revised" License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _______________________________________________________________________________________________________ 3. gperftools: Copyright (c) 2011, Google Inc. Tachyon: Copyright (c) 1994-2008 John E. Stone. All rights reserved. BSD 3-Clause "New" or "Revised" License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _______________________________________________________________________________________________________ 4. Mateusz Kwiatkowski Workaround for bug 62258 in libstdc++ GPL 3.0 with GCC Runtime Library Exception 3.1 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (c) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . GCC RUNTIME LIBRARY EXCEPTION Version 3.1, 31 March 2009 Copyright (c) 2009 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This GCC Runtime Library Exception ("Exception") is an additional permission under section 7 of the GNU General Public License, version 3 ("GPLv3"). It applies to a given file (the "Runtime Library") that bears a notice placed by the copyright holder of the file stating that the file is governed by GPLv3 along with this Exception. When you use GCC to compile a program, GCC may combine portions of certain GCC header files and runtime libraries with the compiled program. The purpose of this Exception is to allow compilation of non-GPL (including proprietary) programs to use, in this way, the header files and runtime libraries covered by this Exception. 0. Definitions. A file is an "Independent Module" if it either requires the Runtime Library for execution after a Compilation Process, or makes use of an interface provided by the Runtime Library, but is not otherwise based on the Runtime Library. "GCC" means a version of the GNU Compiler Collection, with or without modifications, governed by version 3 (or a specified later version) of the GNU General Public License (GPL) with the option of using any subsequent versions published by the FSF. "GPL-compatible Software" is software whose conditions of propagation, modification and use would permit combination with GCC in accord with the license of GCC. "Target Code" refers to output from any compiler for a real or virtual target processor architecture, in executable form or suitable for input to an assembler, loader, linker and/or execution phase. Notwithstanding that, Target Code does not include data in any format that is used as a compiler intermediate representation, or used for producing a compiler intermediate representation. The "Compilation Process" transforms code entirely represented in non-intermediate languages designed for human-written code, and/or in Java Virtual Machine byte code, into Target Code. Thus, for example, use of source code generators and preprocessors need not be considered part of the Compilation Process, since the Compilation Process can be understood as starting with the output of the generators or preprocessors. A Compilation Process is "Eligible" if it is done using GCC, alone or with other GPL-compatible software, or if it is done without using any work based on GCC. For example, using non-GPL-compatible Software to optimize any GCC intermediate representations would not qualify as an Eligible Compilation Process. 1. Grant of Additional Permission. You have permission to propagate a work of Target Code formed by combining the Runtime Library with Independent Modules, even if such propagation would otherwise violate the terms of GPLv3, provided that all Target Code was generated by Eligible Compilation Processes. You may then convey such a combination under terms of your choice, consistent with the licensing of the Independent Modules. 2. No Weakening of GCC Copyleft. The availability of this Exception does not imply any general presumption that third-party software is unaffected by the copyleft requirements of the license of GCC. _______________________________________________________________________________________________________ 5. Doctest Copyright (c) 2016-2019 Viktor Kirilov The MIT License (MIT) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. _______________________________________________________________________________________________________ *Other names and brands may be claimed as the property of others. level-zero-raytracing-support-1.2.3/level_zero/000077500000000000000000000000001514453371700216345ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/level_zero/CMakeLists.txt000066400000000000000000000017721514453371700244030ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 SET(ZE_LOADER_RUNTIME_LINK_NAME_LINUX "libze_loader.so.1" CACHE STRING "Name of the ze_loader lib that is looked for at runtime on Linux") SET(ZE_LOADER_RUNTIME_LINK_NAME_WINDOWS "ze_loader.dll" CACHE STRING "Name of the ze_loader lib that is looked for at runtime on Windows") MARK_AS_ADVANCED(ZE_LOADER_RUNTIME_LINK_NAME_LINUX) MARK_AS_ADVANCED(ZE_LOADER_RUNTIME_LINK_NAME_WINDOWS) ADD_LIBRARY(ze_wrapper STATIC ze_wrapper.cpp) IF (NOT ZE_RAYTRACING_SYCL_TESTS STREQUAL "LEVEL_ZERO_RTAS_BUILDER") TARGET_LINK_LIBRARIES(ze_wrapper PUBLIC embree_rthwif) ELSE() ADD_COMPILE_DEFINITIONS(ZE_RAYTRACING_DISABLE_INTERNAL_BUILDER) ENDIF() TARGET_INCLUDE_DIRECTORIES(ze_wrapper PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/..) TARGET_COMPILE_DEFINITIONS(ze_wrapper PRIVATE ZE_LOADER_NAME_LINUX="${ZE_LOADER_RUNTIME_LINK_NAME_LINUX}") TARGET_COMPILE_DEFINITIONS(ze_wrapper PRIVATE ZE_LOADER_NAME_WINDOWS="${ZE_LOADER_RUNTIME_LINK_NAME_WINDOWS}") level-zero-raytracing-support-1.2.3/level_zero/ze_api.h000066400000000000000000033437071514453371700232750ustar00rootroot00000000000000/* * * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * * @file ze_api.h * @version v1.13-r1.13.1 * */ #ifndef _ZE_API_H #define _ZE_API_H #if defined(__cplusplus) #pragma once #endif // standard headers #include #include #if defined(__cplusplus) extern "C" { #endif // Intel 'oneAPI' Level-Zero API common types #if !defined(__GNUC__) #pragma region common #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAKE_VERSION /// @brief Generates generic 'oneAPI' API versions #define ZE_MAKE_VERSION( _major, _minor ) (( _major << 16 )|( _minor & 0x0000ffff)) #endif // ZE_MAKE_VERSION /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAJOR_VERSION /// @brief Extracts 'oneAPI' API major version #define ZE_MAJOR_VERSION( _ver ) ( _ver >> 16 ) #endif // ZE_MAJOR_VERSION /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MINOR_VERSION /// @brief Extracts 'oneAPI' API minor version #define ZE_MINOR_VERSION( _ver ) ( _ver & 0x0000ffff ) #endif // ZE_MINOR_VERSION /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_APICALL #if defined(_WIN32) /// @brief Calling convention for all API functions #define ZE_APICALL __cdecl #else #define ZE_APICALL #endif // defined(_WIN32) #endif // ZE_APICALL /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_APIEXPORT #if defined(_WIN32) /// @brief Microsoft-specific dllexport storage-class attribute #define ZE_APIEXPORT __declspec(dllexport) #endif // defined(_WIN32) #endif // ZE_APIEXPORT /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_APIEXPORT #if __GNUC__ >= 4 /// @brief GCC-specific dllexport storage-class attribute #define ZE_APIEXPORT __attribute__ ((visibility ("default"))) #else #define ZE_APIEXPORT #endif // __GNUC__ >= 4 #endif // ZE_APIEXPORT /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DLLEXPORT #if defined(_WIN32) /// @brief Microsoft-specific dllexport storage-class attribute #define ZE_DLLEXPORT __declspec(dllexport) #endif // defined(_WIN32) #endif // ZE_DLLEXPORT /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DLLEXPORT #if __GNUC__ >= 4 /// @brief GCC-specific dllexport storage-class attribute #define ZE_DLLEXPORT __attribute__ ((visibility ("default"))) #else #define ZE_DLLEXPORT #endif // __GNUC__ >= 4 #endif // ZE_DLLEXPORT /////////////////////////////////////////////////////////////////////////////// /// @brief compiler-independent type typedef uint8_t ze_bool_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of a driver instance typedef struct _ze_driver_handle_t *ze_driver_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's device object typedef struct _ze_device_handle_t *ze_device_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's context object typedef struct _ze_context_handle_t *ze_context_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's command queue object typedef struct _ze_command_queue_handle_t *ze_command_queue_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's command list object typedef struct _ze_command_list_handle_t *ze_command_list_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's fence object typedef struct _ze_fence_handle_t *ze_fence_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's event pool object typedef struct _ze_event_pool_handle_t *ze_event_pool_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's event object typedef struct _ze_event_handle_t *ze_event_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's image object typedef struct _ze_image_handle_t *ze_image_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's module object typedef struct _ze_module_handle_t *ze_module_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of module's build log object typedef struct _ze_module_build_log_handle_t *ze_module_build_log_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's kernel object typedef struct _ze_kernel_handle_t *ze_kernel_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's sampler object typedef struct _ze_sampler_handle_t *ze_sampler_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of physical memory object typedef struct _ze_physical_mem_handle_t *ze_physical_mem_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's fabric vertex object typedef struct _ze_fabric_vertex_handle_t *ze_fabric_vertex_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of driver's fabric edge object typedef struct _ze_fabric_edge_handle_t *ze_fabric_edge_handle_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_IPC_HANDLE_SIZE /// @brief Maximum IPC handle size #define ZE_MAX_IPC_HANDLE_SIZE 64 #endif // ZE_MAX_IPC_HANDLE_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief IPC handle to a memory allocation typedef struct _ze_ipc_mem_handle_t { char data[ZE_MAX_IPC_HANDLE_SIZE]; ///< [out] Opaque data representing an IPC handle } ze_ipc_mem_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief IPC handle to a event pool allocation typedef struct _ze_ipc_event_pool_handle_t { char data[ZE_MAX_IPC_HANDLE_SIZE]; ///< [out] Opaque data representing an IPC handle } ze_ipc_event_pool_handle_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_BIT /// @brief Generic macro for enumerator bit masks #define ZE_BIT( _i ) ( 1 << _i ) #endif // ZE_BIT /////////////////////////////////////////////////////////////////////////////// /// @brief Defines Return/Error codes typedef enum _ze_result_t { ZE_RESULT_SUCCESS = 0, ///< [Core] success ZE_RESULT_NOT_READY = 1, ///< [Core] synchronization primitive not signaled ZE_RESULT_ERROR_DEVICE_LOST = 0x70000001, ///< [Core] device hung, reset, was removed, or driver update occurred ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY = 0x70000002, ///< [Core] insufficient host memory to satisfy call ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY = 0x70000003, ///< [Core] insufficient device memory to satisfy call ZE_RESULT_ERROR_MODULE_BUILD_FAILURE = 0x70000004, ///< [Core] error occurred when building module, see build log for details ZE_RESULT_ERROR_MODULE_LINK_FAILURE = 0x70000005, ///< [Core] error occurred when linking modules, see build log for details ZE_RESULT_ERROR_DEVICE_REQUIRES_RESET = 0x70000006, ///< [Core] device requires a reset ZE_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE = 0x70000007, ///< [Core] device currently in low power state ZE_RESULT_EXP_ERROR_DEVICE_IS_NOT_VERTEX = 0x7ff00001, ///< [Core, Experimental] device is not represented by a fabric vertex ZE_RESULT_EXP_ERROR_VERTEX_IS_NOT_DEVICE = 0x7ff00002, ///< [Core, Experimental] fabric vertex does not represent a device ZE_RESULT_EXP_ERROR_REMOTE_DEVICE = 0x7ff00003, ///< [Core, Experimental] fabric vertex represents a remote device or ///< subdevice ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE = 0x7ff00004, ///< [Core, Experimental] operands of comparison are not compatible ZE_RESULT_EXP_RTAS_BUILD_RETRY = 0x7ff00005, ///< [Core, Experimental] ray tracing acceleration structure build ///< operation failed due to insufficient resources, retry with a larger ///< acceleration structure buffer allocation ZE_RESULT_EXP_RTAS_BUILD_DEFERRED = 0x7ff00006, ///< [Core, Experimental] ray tracing acceleration structure build ///< operation deferred to parallel operation join ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS = 0x70010000, ///< [Sysman] access denied due to permission level ZE_RESULT_ERROR_NOT_AVAILABLE = 0x70010001, ///< [Sysman] resource already in use and simultaneous access not allowed ///< or resource was removed ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE = 0x70020000, ///< [Common] external required dependency is unavailable or missing ZE_RESULT_WARNING_DROPPED_DATA = 0x70020001, ///< [Tools] data may have been dropped ZE_RESULT_ERROR_UNINITIALIZED = 0x78000001, ///< [Validation] driver is not initialized ZE_RESULT_ERROR_UNSUPPORTED_VERSION = 0x78000002, ///< [Validation] generic error code for unsupported versions ZE_RESULT_ERROR_UNSUPPORTED_FEATURE = 0x78000003, ///< [Validation] generic error code for unsupported features ZE_RESULT_ERROR_INVALID_ARGUMENT = 0x78000004, ///< [Validation] generic error code for invalid arguments ZE_RESULT_ERROR_INVALID_NULL_HANDLE = 0x78000005, ///< [Validation] handle argument is not valid ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE = 0x78000006, ///< [Validation] object pointed to by handle still in-use by device ZE_RESULT_ERROR_INVALID_NULL_POINTER = 0x78000007, ///< [Validation] pointer argument may not be nullptr ZE_RESULT_ERROR_INVALID_SIZE = 0x78000008, ///< [Validation] size argument is invalid (e.g., must not be zero) ZE_RESULT_ERROR_UNSUPPORTED_SIZE = 0x78000009, ///< [Validation] size argument is not supported by the device (e.g., too ///< large) ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT = 0x7800000a, ///< [Validation] alignment argument is not supported by the device (e.g., ///< too small) ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT = 0x7800000b, ///< [Validation] synchronization object in invalid state ZE_RESULT_ERROR_INVALID_ENUMERATION = 0x7800000c, ///< [Validation] enumerator argument is not valid ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION = 0x7800000d, ///< [Validation] enumerator argument is not supported by the device ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT = 0x7800000e, ///< [Validation] image format is not supported by the device ZE_RESULT_ERROR_INVALID_NATIVE_BINARY = 0x7800000f, ///< [Validation] native binary is not supported by the device ZE_RESULT_ERROR_INVALID_GLOBAL_NAME = 0x78000010, ///< [Validation] global variable is not found in the module ZE_RESULT_ERROR_INVALID_KERNEL_NAME = 0x78000011, ///< [Validation] kernel name is not found in the module ZE_RESULT_ERROR_INVALID_FUNCTION_NAME = 0x78000012, ///< [Validation] function name is not found in the module ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION = 0x78000013, ///< [Validation] group size dimension is not valid for the kernel or ///< device ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION = 0x78000014, ///< [Validation] global width dimension is not valid for the kernel or ///< device ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX = 0x78000015, ///< [Validation] kernel argument index is not valid for kernel ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE = 0x78000016, ///< [Validation] kernel argument size does not match kernel ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE = 0x78000017, ///< [Validation] value of kernel attribute is not valid for the kernel or ///< device ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED = 0x78000018, ///< [Validation] module with imports needs to be linked before kernels can ///< be created from it. ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE = 0x78000019, ///< [Validation] command list type does not match command queue type ZE_RESULT_ERROR_OVERLAPPING_REGIONS = 0x7800001a, ///< [Validation] copy operations do not support overlapping regions of ///< memory ZE_RESULT_WARNING_ACTION_REQUIRED = 0x7800001b, ///< [Sysman] an action is required to complete the desired operation ZE_RESULT_ERROR_INVALID_KERNEL_HANDLE = 0x7800001c, ///< [Core, Validation] kernel handle is invalid for the operation ZE_RESULT_EXT_RTAS_BUILD_RETRY = 0x7800001d, ///< [Core, Extension] ray tracing acceleration structure build operation ///< failed due to insufficient resources, retry with a larger acceleration ///< structure buffer allocation ZE_RESULT_EXT_RTAS_BUILD_DEFERRED = 0x7800001e, ///< [Core, Extension] ray tracing acceleration structure build operation ///< deferred to parallel operation join ZE_RESULT_EXT_ERROR_OPERANDS_INCOMPATIBLE = 0x7800001f, ///< [Core, Extension] operands of comparison are not compatible ZE_RESULT_ERROR_SURVIVABILITY_MODE_DETECTED = 0x78000020, ///< [Sysman] device is in survivability mode, firmware update needed ZE_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< [Core] unknown or internal error ZE_RESULT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RESULT_* ENUMs } ze_result_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum _ze_structure_type_t { ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES = 0x1, ///< ::ze_driver_properties_t ZE_STRUCTURE_TYPE_DRIVER_IPC_PROPERTIES = 0x2, ///< ::ze_driver_ipc_properties_t ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x3, ///< ::ze_device_properties_t ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES = 0x4, ///< ::ze_device_compute_properties_t ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES = 0x5, ///< ::ze_device_module_properties_t ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES = 0x6, ///< ::ze_command_queue_group_properties_t ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES = 0x7, ///< ::ze_device_memory_properties_t ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES = 0x8, ///< ::ze_device_memory_access_properties_t ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES = 0x9, ///< ::ze_device_cache_properties_t ZE_STRUCTURE_TYPE_DEVICE_IMAGE_PROPERTIES = 0xa, ///< ::ze_device_image_properties_t ZE_STRUCTURE_TYPE_DEVICE_P2P_PROPERTIES = 0xb, ///< ::ze_device_p2p_properties_t ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES = 0xc, ///< ::ze_device_external_memory_properties_t ZE_STRUCTURE_TYPE_CONTEXT_DESC = 0xd, ///< ::ze_context_desc_t ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC = 0xe, ///< ::ze_command_queue_desc_t ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC = 0xf, ///< ::ze_command_list_desc_t ZE_STRUCTURE_TYPE_EVENT_POOL_DESC = 0x10, ///< ::ze_event_pool_desc_t ZE_STRUCTURE_TYPE_EVENT_DESC = 0x11, ///< ::ze_event_desc_t ZE_STRUCTURE_TYPE_FENCE_DESC = 0x12, ///< ::ze_fence_desc_t ZE_STRUCTURE_TYPE_IMAGE_DESC = 0x13, ///< ::ze_image_desc_t ZE_STRUCTURE_TYPE_IMAGE_PROPERTIES = 0x14, ///< ::ze_image_properties_t ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC = 0x15, ///< ::ze_device_mem_alloc_desc_t ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC = 0x16, ///< ::ze_host_mem_alloc_desc_t ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES = 0x17, ///< ::ze_memory_allocation_properties_t ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC = 0x18, ///< ::ze_external_memory_export_desc_t ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD = 0x19, ///< ::ze_external_memory_import_fd_t ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD = 0x1a, ///< ::ze_external_memory_export_fd_t ZE_STRUCTURE_TYPE_MODULE_DESC = 0x1b, ///< ::ze_module_desc_t ZE_STRUCTURE_TYPE_MODULE_PROPERTIES = 0x1c, ///< ::ze_module_properties_t ZE_STRUCTURE_TYPE_KERNEL_DESC = 0x1d, ///< ::ze_kernel_desc_t ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES = 0x1e, ///< ::ze_kernel_properties_t ZE_STRUCTURE_TYPE_SAMPLER_DESC = 0x1f, ///< ::ze_sampler_desc_t ZE_STRUCTURE_TYPE_PHYSICAL_MEM_DESC = 0x20, ///< ::ze_physical_mem_desc_t ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES = 0x21, ///< ::ze_kernel_preferred_group_size_properties_t ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32 = 0x22, ///< ::ze_external_memory_import_win32_handle_t ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32 = 0x23, ///< ::ze_external_memory_export_win32_handle_t ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES = 0x00010001, ///< ::ze_device_raytracing_ext_properties_t ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC = 0x10002, ///< ::ze_raytracing_mem_alloc_ext_desc_t ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES = 0x10003, ///< ::ze_float_atomic_ext_properties_t ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC = 0x10004, ///< ::ze_cache_reservation_ext_desc_t ZE_STRUCTURE_TYPE_EU_COUNT_EXT = 0x10005, ///< ::ze_eu_count_ext_t ZE_STRUCTURE_TYPE_SRGB_EXT_DESC = 0x10006, ///< ::ze_srgb_ext_desc_t ZE_STRUCTURE_TYPE_LINKAGE_INSPECTION_EXT_DESC = 0x10007, ///< ::ze_linkage_inspection_ext_desc_t ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES = 0x10008, ///< ::ze_pci_ext_properties_t ZE_STRUCTURE_TYPE_DRIVER_MEMORY_FREE_EXT_PROPERTIES = 0x10009, ///< ::ze_driver_memory_free_ext_properties_t ZE_STRUCTURE_TYPE_MEMORY_FREE_EXT_DESC = 0x1000a, ///< ::ze_memory_free_ext_desc_t ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC = 0x1000b, ///< ::ze_memory_compression_hints_ext_desc_t ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES = 0x1000c, ///< ::ze_image_allocation_ext_properties_t ZE_STRUCTURE_TYPE_DEVICE_LUID_EXT_PROPERTIES = 0x1000d, ///< ::ze_device_luid_ext_properties_t ZE_STRUCTURE_TYPE_DEVICE_MEMORY_EXT_PROPERTIES = 0x1000e, ///< ::ze_device_memory_ext_properties_t ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT = 0x1000f, ///< ::ze_device_ip_version_ext_t ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXT_DESC = 0x10010, ///< ::ze_image_view_planar_ext_desc_t ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES = 0x10011, ///< ::ze_event_query_kernel_timestamps_ext_properties_t ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_RESULTS_EXT_PROPERTIES = 0x10012, ///< ::ze_event_query_kernel_timestamps_results_ext_properties_t ZE_STRUCTURE_TYPE_KERNEL_MAX_GROUP_SIZE_EXT_PROPERTIES = 0x10013, ///< ::ze_kernel_max_group_size_ext_properties_t ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC = 0x00020001, ///< ::ze_relaxed_allocation_limits_exp_desc_t ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC = 0x00020002, ///< ::ze_module_program_exp_desc_t ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES = 0x00020003, ///< ::ze_scheduling_hint_exp_properties_t ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_DESC = 0x00020004, ///< ::ze_scheduling_hint_exp_desc_t ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC = 0x00020005, ///< ::ze_image_view_planar_exp_desc_t ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2 = 0x00020006, ///< ::ze_device_properties_t ZE_STRUCTURE_TYPE_IMAGE_MEMORY_EXP_PROPERTIES = 0x00020007, ///< ::ze_image_memory_properties_exp_t ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC = 0x00020008, ///< ::ze_context_power_saving_hint_exp_desc_t ZE_STRUCTURE_TYPE_COPY_BANDWIDTH_EXP_PROPERTIES = 0x00020009, ///< ::ze_copy_bandwidth_exp_properties_t ZE_STRUCTURE_TYPE_DEVICE_P2P_BANDWIDTH_EXP_PROPERTIES = 0x0002000A, ///< ::ze_device_p2p_bandwidth_exp_properties_t ZE_STRUCTURE_TYPE_FABRIC_VERTEX_EXP_PROPERTIES = 0x0002000B, ///< ::ze_fabric_vertex_exp_properties_t ZE_STRUCTURE_TYPE_FABRIC_EDGE_EXP_PROPERTIES = 0x0002000C, ///< ::ze_fabric_edge_exp_properties_t ZE_STRUCTURE_TYPE_MEMORY_SUB_ALLOCATIONS_EXP_PROPERTIES = 0x0002000D, ///< ::ze_memory_sub_allocations_exp_properties_t ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC = 0x0002000E, ///< ::ze_rtas_builder_exp_desc_t ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC = 0x0002000F, ///< ::ze_rtas_builder_build_op_exp_desc_t ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES = 0x00020010, ///< ::ze_rtas_builder_exp_properties_t ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES = 0x00020011, ///< ::ze_rtas_parallel_operation_exp_properties_t ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES = 0x00020012, ///< ::ze_rtas_device_exp_properties_t ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS = 0x00020013, ///< ::ze_rtas_geometry_aabbs_exp_cb_params_t ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC = 0x00020014, ///< ::ze_event_pool_counter_based_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_PROPERTIES = 0x00020015, ///< ::ze_mutable_command_list_exp_properties_t ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC = 0x00020016, ///< ::ze_mutable_command_list_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC = 0x00020017, ///< ::ze_mutable_command_id_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC = 0x00020018, ///< ::ze_mutable_commands_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC = 0x00020019, ///< ::ze_mutable_kernel_argument_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC = 0x0002001A, ///< ::ze_mutable_group_count_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC = 0x0002001B, ///< ::ze_mutable_group_size_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC = 0x0002001C, ///< ::ze_mutable_global_offset_exp_desc_t ZE_STRUCTURE_TYPE_PITCHED_ALLOC_DEVICE_EXP_PROPERTIES = 0x0002001D, ///< ::ze_device_pitched_alloc_exp_properties_t ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC = 0x0002001E, ///< ::ze_image_bindless_exp_desc_t ZE_STRUCTURE_TYPE_PITCHED_IMAGE_EXP_DESC = 0x0002001F, ///< ::ze_image_pitched_exp_desc_t ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC = 0x00020020, ///< ::ze_mutable_graph_argument_exp_desc_t ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC = 0x00020021, ///< ::ze_init_driver_type_desc_t ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_EXT_DESC = 0x00020022, ///< ::ze_external_semaphore_ext_desc_t ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_WIN32_EXT_DESC = 0x00020023, ///< ::ze_external_semaphore_win32_ext_desc_t ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_FD_EXT_DESC = 0x00020024, ///< ::ze_external_semaphore_fd_ext_desc_t ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_EXT = 0x00020025, ///< ::ze_external_semaphore_signal_params_ext_t ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_WAIT_PARAMS_EXT = 0x00020026, ///< ::ze_external_semaphore_wait_params_ext_t ZE_STRUCTURE_TYPE_DRIVER_DDI_HANDLES_EXT_PROPERTIES = 0x00020027, ///< ::ze_driver_ddi_handles_ext_properties_t ZE_STRUCTURE_TYPE_DEVICE_CACHELINE_SIZE_EXT = 0x00020028, ///< ::ze_device_cache_line_size_ext_t ZE_STRUCTURE_TYPE_DEVICE_VECTOR_WIDTH_PROPERTIES_EXT = 0x00020029, ///< ::ze_device_vector_width_properties_ext_t ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_DESC = 0x00020030, ///< ::ze_rtas_builder_ext_desc_t ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXT_DESC = 0x00020031, ///< ::ze_rtas_builder_build_op_ext_desc_t ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_PROPERTIES = 0x00020032, ///< ::ze_rtas_builder_ext_properties_t ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES = 0x00020033, ///< ::ze_rtas_parallel_operation_ext_properties_t ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXT_PROPERTIES = 0x00020034, ///< ::ze_rtas_device_ext_properties_t ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS = 0x00020035, ///< ::ze_rtas_geometry_aabbs_ext_cb_params_t ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_STRUCTURE_TYPE_* ENUMs } ze_structure_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External memory type flags typedef uint32_t ze_external_memory_type_flags_t; typedef enum _ze_external_memory_type_flag_t { ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD = ZE_BIT(0), ///< an opaque POSIX file descriptor handle ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF = ZE_BIT(1), ///< a file descriptor handle for a Linux dma_buf ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32 = ZE_BIT(2), ///< an NT handle ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32_KMT = ZE_BIT(3), ///< a global share (KMT) handle ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D11_TEXTURE = ZE_BIT(4), ///< an NT handle referring to a Direct3D 10 or 11 texture resource ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D11_TEXTURE_KMT = ZE_BIT(5), ///< a global share (KMT) handle referring to a Direct3D 10 or 11 texture ///< resource ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_HEAP = ZE_BIT(6), ///< an NT handle referring to a Direct3D 12 heap resource ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_RESOURCE = ZE_BIT(7), ///< an NT handle referring to a Direct3D 12 committed resource ZE_EXTERNAL_MEMORY_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EXTERNAL_MEMORY_TYPE_FLAG_* ENUMs } ze_external_memory_type_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Bandwidth unit typedef enum _ze_bandwidth_unit_t { ZE_BANDWIDTH_UNIT_UNKNOWN = 0, ///< The unit used for bandwidth is unknown ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC = 1, ///< Bandwidth is provided in bytes/nanosec ZE_BANDWIDTH_UNIT_BYTES_PER_CLOCK = 2, ///< Bandwidth is provided in bytes/clock ZE_BANDWIDTH_UNIT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_BANDWIDTH_UNIT_* ENUMs } ze_bandwidth_unit_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Latency unit typedef enum _ze_latency_unit_t { ZE_LATENCY_UNIT_UNKNOWN = 0, ///< The unit used for latency is unknown ZE_LATENCY_UNIT_NANOSEC = 1, ///< Latency is provided in nanosecs ZE_LATENCY_UNIT_CLOCK = 2, ///< Latency is provided in clocks ZE_LATENCY_UNIT_HOP = 3, ///< Latency is provided in hops (normalized so that the lowest latency ///< link has a latency of 1 hop) ZE_LATENCY_UNIT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_LATENCY_UNIT_* ENUMs } ze_latency_unit_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_UUID_SIZE /// @brief Maximum universal unique id (UUID) size in bytes #define ZE_MAX_UUID_SIZE 16 #endif // ZE_MAX_UUID_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief Universal unique id (UUID) typedef struct _ze_uuid_t { uint8_t id[ZE_MAX_UUID_SIZE]; ///< [out] opaque data representing a UUID } ze_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Base for all callback function parameter types typedef struct _ze_base_cb_params_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). } ze_base_cb_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Base for all properties types typedef struct _ze_base_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). } ze_base_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Base for all descriptor types typedef struct _ze_base_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). } ze_base_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forces driver to only report devices (and sub-devices) as specified by /// values /////////////////////////////////////////////////////////////////////////////// /// @brief Forces driver to report devices from lowest to highest PCI bus ID /////////////////////////////////////////////////////////////////////////////// /// @brief Forces all shared allocations into device memory /////////////////////////////////////////////////////////////////////////////// /// @brief Defines the device hierarchy model exposed by Level Zero driver /// implementation /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_ipc_mem_handle_t typedef struct _ze_ipc_mem_handle_t ze_ipc_mem_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_ipc_event_pool_handle_t typedef struct _ze_ipc_event_pool_handle_t ze_ipc_event_pool_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_uuid_t typedef struct _ze_uuid_t ze_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_base_cb_params_t typedef struct _ze_base_cb_params_t ze_base_cb_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_base_properties_t typedef struct _ze_base_properties_t ze_base_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_base_desc_t typedef struct _ze_base_desc_t ze_base_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_init_driver_type_desc_t typedef struct _ze_init_driver_type_desc_t ze_init_driver_type_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_driver_uuid_t typedef struct _ze_driver_uuid_t ze_driver_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_driver_properties_t typedef struct _ze_driver_properties_t ze_driver_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_driver_ipc_properties_t typedef struct _ze_driver_ipc_properties_t ze_driver_ipc_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_driver_extension_properties_t typedef struct _ze_driver_extension_properties_t ze_driver_extension_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_uuid_t typedef struct _ze_device_uuid_t ze_device_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_properties_t typedef struct _ze_device_properties_t ze_device_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_thread_t typedef struct _ze_device_thread_t ze_device_thread_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_compute_properties_t typedef struct _ze_device_compute_properties_t ze_device_compute_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_native_kernel_uuid_t typedef struct _ze_native_kernel_uuid_t ze_native_kernel_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_module_properties_t typedef struct _ze_device_module_properties_t ze_device_module_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_command_queue_group_properties_t typedef struct _ze_command_queue_group_properties_t ze_command_queue_group_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_memory_properties_t typedef struct _ze_device_memory_properties_t ze_device_memory_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_memory_access_properties_t typedef struct _ze_device_memory_access_properties_t ze_device_memory_access_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_cache_properties_t typedef struct _ze_device_cache_properties_t ze_device_cache_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_image_properties_t typedef struct _ze_device_image_properties_t ze_device_image_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_external_memory_properties_t typedef struct _ze_device_external_memory_properties_t ze_device_external_memory_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_p2p_properties_t typedef struct _ze_device_p2p_properties_t ze_device_p2p_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_context_desc_t typedef struct _ze_context_desc_t ze_context_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_command_queue_desc_t typedef struct _ze_command_queue_desc_t ze_command_queue_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_command_list_desc_t typedef struct _ze_command_list_desc_t ze_command_list_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_copy_region_t typedef struct _ze_copy_region_t ze_copy_region_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_region_t typedef struct _ze_image_region_t ze_image_region_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_event_pool_desc_t typedef struct _ze_event_pool_desc_t ze_event_pool_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_event_desc_t typedef struct _ze_event_desc_t ze_event_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_timestamp_data_t typedef struct _ze_kernel_timestamp_data_t ze_kernel_timestamp_data_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_timestamp_result_t typedef struct _ze_kernel_timestamp_result_t ze_kernel_timestamp_result_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_fence_desc_t typedef struct _ze_fence_desc_t ze_fence_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_format_t typedef struct _ze_image_format_t ze_image_format_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_desc_t typedef struct _ze_image_desc_t ze_image_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_properties_t typedef struct _ze_image_properties_t ze_image_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_mem_alloc_desc_t typedef struct _ze_device_mem_alloc_desc_t ze_device_mem_alloc_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_host_mem_alloc_desc_t typedef struct _ze_host_mem_alloc_desc_t ze_host_mem_alloc_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_memory_allocation_properties_t typedef struct _ze_memory_allocation_properties_t ze_memory_allocation_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_memory_export_desc_t typedef struct _ze_external_memory_export_desc_t ze_external_memory_export_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_memory_import_fd_t typedef struct _ze_external_memory_import_fd_t ze_external_memory_import_fd_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_memory_export_fd_t typedef struct _ze_external_memory_export_fd_t ze_external_memory_export_fd_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_memory_import_win32_handle_t typedef struct _ze_external_memory_import_win32_handle_t ze_external_memory_import_win32_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_memory_export_win32_handle_t typedef struct _ze_external_memory_export_win32_handle_t ze_external_memory_export_win32_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_module_constants_t typedef struct _ze_module_constants_t ze_module_constants_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_module_desc_t typedef struct _ze_module_desc_t ze_module_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_module_properties_t typedef struct _ze_module_properties_t ze_module_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_desc_t typedef struct _ze_kernel_desc_t ze_kernel_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_uuid_t typedef struct _ze_kernel_uuid_t ze_kernel_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_properties_t typedef struct _ze_kernel_properties_t ze_kernel_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_preferred_group_size_properties_t typedef struct _ze_kernel_preferred_group_size_properties_t ze_kernel_preferred_group_size_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_group_count_t typedef struct _ze_group_count_t ze_group_count_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_module_program_exp_desc_t typedef struct _ze_module_program_exp_desc_t ze_module_program_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_raytracing_ext_properties_t typedef struct _ze_device_raytracing_ext_properties_t ze_device_raytracing_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_raytracing_mem_alloc_ext_desc_t typedef struct _ze_raytracing_mem_alloc_ext_desc_t ze_raytracing_mem_alloc_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_sampler_desc_t typedef struct _ze_sampler_desc_t ze_sampler_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_physical_mem_desc_t typedef struct _ze_physical_mem_desc_t ze_physical_mem_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_float_atomic_ext_properties_t typedef struct _ze_float_atomic_ext_properties_t ze_float_atomic_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_relaxed_allocation_limits_exp_desc_t typedef struct _ze_relaxed_allocation_limits_exp_desc_t ze_relaxed_allocation_limits_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_driver_ddi_handles_ext_properties_t typedef struct _ze_driver_ddi_handles_ext_properties_t ze_driver_ddi_handles_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_semaphore_ext_desc_t typedef struct _ze_external_semaphore_ext_desc_t ze_external_semaphore_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_semaphore_win32_ext_desc_t typedef struct _ze_external_semaphore_win32_ext_desc_t ze_external_semaphore_win32_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_semaphore_fd_ext_desc_t typedef struct _ze_external_semaphore_fd_ext_desc_t ze_external_semaphore_fd_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_semaphore_signal_params_ext_t typedef struct _ze_external_semaphore_signal_params_ext_t ze_external_semaphore_signal_params_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_external_semaphore_wait_params_ext_t typedef struct _ze_external_semaphore_wait_params_ext_t ze_external_semaphore_wait_params_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_cache_line_size_ext_t typedef struct _ze_device_cache_line_size_ext_t ze_device_cache_line_size_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_ext_desc_t typedef struct _ze_rtas_builder_ext_desc_t ze_rtas_builder_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_ext_properties_t typedef struct _ze_rtas_builder_ext_properties_t ze_rtas_builder_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_parallel_operation_ext_properties_t typedef struct _ze_rtas_parallel_operation_ext_properties_t ze_rtas_parallel_operation_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_device_ext_properties_t typedef struct _ze_rtas_device_ext_properties_t ze_rtas_device_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_float3_ext_t typedef struct _ze_rtas_float3_ext_t ze_rtas_float3_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_transform_float3x4_column_major_ext_t typedef struct _ze_rtas_transform_float3x4_column_major_ext_t ze_rtas_transform_float3x4_column_major_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_transform_float3x4_aligned_column_major_ext_t typedef struct _ze_rtas_transform_float3x4_aligned_column_major_ext_t ze_rtas_transform_float3x4_aligned_column_major_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_transform_float3x4_row_major_ext_t typedef struct _ze_rtas_transform_float3x4_row_major_ext_t ze_rtas_transform_float3x4_row_major_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_aabb_ext_t typedef struct _ze_rtas_aabb_ext_t ze_rtas_aabb_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_triangle_indices_uint32_ext_t typedef struct _ze_rtas_triangle_indices_uint32_ext_t ze_rtas_triangle_indices_uint32_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_quad_indices_uint32_ext_t typedef struct _ze_rtas_quad_indices_uint32_ext_t ze_rtas_quad_indices_uint32_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_geometry_info_ext_t typedef struct _ze_rtas_builder_geometry_info_ext_t ze_rtas_builder_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_triangles_geometry_info_ext_t typedef struct _ze_rtas_builder_triangles_geometry_info_ext_t ze_rtas_builder_triangles_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_quads_geometry_info_ext_t typedef struct _ze_rtas_builder_quads_geometry_info_ext_t ze_rtas_builder_quads_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_geometry_aabbs_ext_cb_params_t typedef struct _ze_rtas_geometry_aabbs_ext_cb_params_t ze_rtas_geometry_aabbs_ext_cb_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_procedural_geometry_info_ext_t typedef struct _ze_rtas_builder_procedural_geometry_info_ext_t ze_rtas_builder_procedural_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_instance_geometry_info_ext_t typedef struct _ze_rtas_builder_instance_geometry_info_ext_t ze_rtas_builder_instance_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_build_op_ext_desc_t typedef struct _ze_rtas_builder_build_op_ext_desc_t ze_rtas_builder_build_op_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_vector_width_properties_ext_t typedef struct _ze_device_vector_width_properties_ext_t ze_device_vector_width_properties_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_cache_reservation_ext_desc_t typedef struct _ze_cache_reservation_ext_desc_t ze_cache_reservation_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_memory_properties_exp_t typedef struct _ze_image_memory_properties_exp_t ze_image_memory_properties_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_view_planar_ext_desc_t typedef struct _ze_image_view_planar_ext_desc_t ze_image_view_planar_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_view_planar_exp_desc_t typedef struct _ze_image_view_planar_exp_desc_t ze_image_view_planar_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_scheduling_hint_exp_properties_t typedef struct _ze_scheduling_hint_exp_properties_t ze_scheduling_hint_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_scheduling_hint_exp_desc_t typedef struct _ze_scheduling_hint_exp_desc_t ze_scheduling_hint_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_context_power_saving_hint_exp_desc_t typedef struct _ze_context_power_saving_hint_exp_desc_t ze_context_power_saving_hint_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_eu_count_ext_t typedef struct _ze_eu_count_ext_t ze_eu_count_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_pci_address_ext_t typedef struct _ze_pci_address_ext_t ze_pci_address_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_pci_speed_ext_t typedef struct _ze_pci_speed_ext_t ze_pci_speed_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_pci_ext_properties_t typedef struct _ze_pci_ext_properties_t ze_pci_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_srgb_ext_desc_t typedef struct _ze_srgb_ext_desc_t ze_srgb_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_allocation_ext_properties_t typedef struct _ze_image_allocation_ext_properties_t ze_image_allocation_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_linkage_inspection_ext_desc_t typedef struct _ze_linkage_inspection_ext_desc_t ze_linkage_inspection_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_memory_compression_hints_ext_desc_t typedef struct _ze_memory_compression_hints_ext_desc_t ze_memory_compression_hints_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_driver_memory_free_ext_properties_t typedef struct _ze_driver_memory_free_ext_properties_t ze_driver_memory_free_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_memory_free_ext_desc_t typedef struct _ze_memory_free_ext_desc_t ze_memory_free_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_p2p_bandwidth_exp_properties_t typedef struct _ze_device_p2p_bandwidth_exp_properties_t ze_device_p2p_bandwidth_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_copy_bandwidth_exp_properties_t typedef struct _ze_copy_bandwidth_exp_properties_t ze_copy_bandwidth_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_luid_ext_t typedef struct _ze_device_luid_ext_t ze_device_luid_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_luid_ext_properties_t typedef struct _ze_device_luid_ext_properties_t ze_device_luid_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_fabric_vertex_pci_exp_address_t typedef struct _ze_fabric_vertex_pci_exp_address_t ze_fabric_vertex_pci_exp_address_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_fabric_vertex_exp_properties_t typedef struct _ze_fabric_vertex_exp_properties_t ze_fabric_vertex_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_fabric_edge_exp_properties_t typedef struct _ze_fabric_edge_exp_properties_t ze_fabric_edge_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_memory_ext_properties_t typedef struct _ze_device_memory_ext_properties_t ze_device_memory_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_ip_version_ext_t typedef struct _ze_device_ip_version_ext_t ze_device_ip_version_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_kernel_max_group_size_properties_ext_t typedef struct _ze_kernel_max_group_size_properties_ext_t ze_kernel_max_group_size_properties_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_sub_allocation_t typedef struct _ze_sub_allocation_t ze_sub_allocation_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_memory_sub_allocations_exp_properties_t typedef struct _ze_memory_sub_allocations_exp_properties_t ze_memory_sub_allocations_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_event_query_kernel_timestamps_ext_properties_t typedef struct _ze_event_query_kernel_timestamps_ext_properties_t ze_event_query_kernel_timestamps_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_synchronized_timestamp_data_ext_t typedef struct _ze_synchronized_timestamp_data_ext_t ze_synchronized_timestamp_data_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_synchronized_timestamp_result_ext_t typedef struct _ze_synchronized_timestamp_result_ext_t ze_synchronized_timestamp_result_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_event_query_kernel_timestamps_results_ext_properties_t typedef struct _ze_event_query_kernel_timestamps_results_ext_properties_t ze_event_query_kernel_timestamps_results_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_exp_desc_t typedef struct _ze_rtas_builder_exp_desc_t ze_rtas_builder_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_exp_properties_t typedef struct _ze_rtas_builder_exp_properties_t ze_rtas_builder_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_parallel_operation_exp_properties_t typedef struct _ze_rtas_parallel_operation_exp_properties_t ze_rtas_parallel_operation_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_device_exp_properties_t typedef struct _ze_rtas_device_exp_properties_t ze_rtas_device_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_float3_exp_t typedef struct _ze_rtas_float3_exp_t ze_rtas_float3_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_transform_float3x4_column_major_exp_t typedef struct _ze_rtas_transform_float3x4_column_major_exp_t ze_rtas_transform_float3x4_column_major_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_transform_float3x4_aligned_column_major_exp_t typedef struct _ze_rtas_transform_float3x4_aligned_column_major_exp_t ze_rtas_transform_float3x4_aligned_column_major_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_transform_float3x4_row_major_exp_t typedef struct _ze_rtas_transform_float3x4_row_major_exp_t ze_rtas_transform_float3x4_row_major_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_aabb_exp_t typedef struct _ze_rtas_aabb_exp_t ze_rtas_aabb_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_triangle_indices_uint32_exp_t typedef struct _ze_rtas_triangle_indices_uint32_exp_t ze_rtas_triangle_indices_uint32_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_quad_indices_uint32_exp_t typedef struct _ze_rtas_quad_indices_uint32_exp_t ze_rtas_quad_indices_uint32_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_geometry_info_exp_t typedef struct _ze_rtas_builder_geometry_info_exp_t ze_rtas_builder_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_triangles_geometry_info_exp_t typedef struct _ze_rtas_builder_triangles_geometry_info_exp_t ze_rtas_builder_triangles_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_quads_geometry_info_exp_t typedef struct _ze_rtas_builder_quads_geometry_info_exp_t ze_rtas_builder_quads_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_geometry_aabbs_exp_cb_params_t typedef struct _ze_rtas_geometry_aabbs_exp_cb_params_t ze_rtas_geometry_aabbs_exp_cb_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_procedural_geometry_info_exp_t typedef struct _ze_rtas_builder_procedural_geometry_info_exp_t ze_rtas_builder_procedural_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_instance_geometry_info_exp_t typedef struct _ze_rtas_builder_instance_geometry_info_exp_t ze_rtas_builder_instance_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_rtas_builder_build_op_exp_desc_t typedef struct _ze_rtas_builder_build_op_exp_desc_t ze_rtas_builder_build_op_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_event_pool_counter_based_exp_desc_t typedef struct _ze_event_pool_counter_based_exp_desc_t ze_event_pool_counter_based_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_bindless_exp_desc_t typedef struct _ze_image_bindless_exp_desc_t ze_image_bindless_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_image_pitched_exp_desc_t typedef struct _ze_image_pitched_exp_desc_t ze_image_pitched_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_device_pitched_alloc_exp_properties_t typedef struct _ze_device_pitched_alloc_exp_properties_t ze_device_pitched_alloc_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_command_id_exp_desc_t typedef struct _ze_mutable_command_id_exp_desc_t ze_mutable_command_id_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_command_list_exp_properties_t typedef struct _ze_mutable_command_list_exp_properties_t ze_mutable_command_list_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_command_list_exp_desc_t typedef struct _ze_mutable_command_list_exp_desc_t ze_mutable_command_list_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_commands_exp_desc_t typedef struct _ze_mutable_commands_exp_desc_t ze_mutable_commands_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_kernel_argument_exp_desc_t typedef struct _ze_mutable_kernel_argument_exp_desc_t ze_mutable_kernel_argument_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_group_count_exp_desc_t typedef struct _ze_mutable_group_count_exp_desc_t ze_mutable_group_count_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_group_size_exp_desc_t typedef struct _ze_mutable_group_size_exp_desc_t ze_mutable_group_size_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_global_offset_exp_desc_t typedef struct _ze_mutable_global_offset_exp_desc_t ze_mutable_global_offset_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare ze_mutable_graph_argument_exp_desc_t typedef struct _ze_mutable_graph_argument_exp_desc_t ze_mutable_graph_argument_exp_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs #if !defined(__GNUC__) #pragma region driver #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported initialization flags typedef uint32_t ze_init_flags_t; typedef enum _ze_init_flag_t { ZE_INIT_FLAG_GPU_ONLY = ZE_BIT(0), ///< only initialize GPU drivers ZE_INIT_FLAG_VPU_ONLY = ZE_BIT(1), ///< only initialize VPU drivers ZE_INIT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_INIT_FLAG_* ENUMs } ze_init_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Initialize the 'oneAPI' driver(s) /// /// @details /// - @deprecated since 1.10. Please use zeInitDrivers() /// - The application must call this function or zeInitDrivers before /// calling any other function. /// - If this function is not called then all other functions will return /// ::ZE_RESULT_ERROR_UNINITIALIZED. /// - Only one instance of each driver will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios /// where multiple libraries may initialize the driver(s) simultaneously. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeInit( ze_init_flags_t flags ///< [in] initialization flags. ///< must be 0 (default) or a combination of ::ze_init_flag_t. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves driver instances /// /// @details /// - @deprecated since 1.10. Please use zeInitDrivers() /// - Usage of zeInitDrivers and zeDriverGet is mutually exclusive and /// should not be used together. Usage of them together will result in /// undefined behavior. /// - A driver represents a collection of physical devices. /// - Multiple calls to this function will return identical driver handles, /// in the same order. /// - The application may pass nullptr for pDrivers when only querying the /// number of drivers. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clGetPlatformIDs /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGet( uint32_t* pCount, ///< [in,out] pointer to the number of driver instances. ///< if count is zero, then the loader shall update the value with the ///< total number of drivers available. ///< if count is greater than the number of drivers available, then the ///< loader shall update the value with the correct number of drivers available. ze_driver_handle_t* phDrivers ///< [in,out][optional][range(0, *pCount)] array of driver instance handles. ///< if count is less than the number of drivers available, then the loader ///< shall only retrieve that number of drivers. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported driver initialization type flags /// /// @details /// - Bit Field which details the driver types to be initialized and /// returned to the user. /// - Value Definition: /// - 0, do not init or retrieve any drivers. /// - ZE_INIT_DRIVER_TYPE_FLAG_GPU, GPU Drivers are Init and driver handles /// retrieved. /// - ZE_INIT_DRIVER_TYPE_FLAG_NPU, NPU Drivers are Init and driver handles /// retrieved. /// - ZE_INIT_DRIVER_TYPE_FLAG_GPU | ZE_INIT_DRIVER_TYPE_FLAG_NPU, NPU & GPU /// Drivers are Init and driver handles retrieved. /// - UINT32_MAX All Drivers of any type are Init and driver handles /// retrieved. typedef uint32_t ze_init_driver_type_flags_t; typedef enum _ze_init_driver_type_flag_t { ZE_INIT_DRIVER_TYPE_FLAG_GPU = ZE_BIT(0), ///< initialize and retrieve GPU drivers ZE_INIT_DRIVER_TYPE_FLAG_NPU = ZE_BIT(1), ///< initialize and retrieve NPU drivers ZE_INIT_DRIVER_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_INIT_DRIVER_TYPE_FLAG_* ENUMs } ze_init_driver_type_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Init Driver Type descriptor typedef struct _ze_init_driver_type_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_init_driver_type_flags_t flags; ///< [in] driver type init flags. ///< must be a valid combination of ::ze_init_driver_type_flag_t or UINT32_MAX; ///< driver types are init and retrieved based on these init flags in zeInitDrivers(). } ze_init_driver_type_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Initialize the 'oneAPI' driver(s) based on the driver types requested /// and retrieve the driver handles. /// /// @details /// - The application must call this function or zeInit before calling any /// other function. (zeInit is [Deprecated] and is replaced by /// zeInitDrivers) /// - Calls to zeInit[Deprecated] or InitDrivers will not alter the drivers /// retrieved through either api. /// - Drivers init through zeInit[Deprecated] or InitDrivers will not be /// reInitialized once init in an application. The Loader will determine /// if the already init driver needs to be delivered to the user through /// the init type flags. /// - Already init Drivers will not be uninitialized if the call to /// InitDrivers does not include that driver's type. Those init drivers /// which don't match the init flags will not have their driver handles /// returned to the user in that InitDrivers call. /// - If this function or zeInit[Deprecated] is not called, then all other /// functions will return ::ZE_RESULT_ERROR_UNINITIALIZED. /// - Only one instance of each driver will be initialized per process. /// - A driver represents a collection of physical devices. /// - Multiple calls to this function will return identical driver handles, /// in the same order. /// - The drivers returned to the caller will be based on the init types /// which state the drivers to be included. /// - The application may pass nullptr for pDrivers when only querying the /// number of drivers. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios /// where multiple libraries may initialize the driver(s) simultaneously. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` /// + `nullptr == desc` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x0 == desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeInitDrivers( uint32_t* pCount, ///< [in,out] pointer to the number of driver instances. ///< if count is zero, then the loader shall update the value with the ///< total number of drivers available. ///< if count is greater than the number of drivers available, then the ///< loader shall update the value with the correct number of drivers available. ze_driver_handle_t* phDrivers, ///< [in,out][optional][range(0, *pCount)] array of driver instance handles. ///< if count is less than the number of drivers available, then the loader ///< shall only retrieve that number of drivers. ze_init_driver_type_desc_t* desc ///< [in] descriptor containing the driver type initialization details ///< including ::ze_init_driver_type_flag_t combinations. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported API versions /// /// @details /// - API versions contain major and minor attributes, use /// ::ZE_MAJOR_VERSION and ::ZE_MINOR_VERSION typedef enum _ze_api_version_t { ZE_API_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_API_VERSION_1_1 = ZE_MAKE_VERSION( 1, 1 ), ///< version 1.1 ZE_API_VERSION_1_2 = ZE_MAKE_VERSION( 1, 2 ), ///< version 1.2 ZE_API_VERSION_1_3 = ZE_MAKE_VERSION( 1, 3 ), ///< version 1.3 ZE_API_VERSION_1_4 = ZE_MAKE_VERSION( 1, 4 ), ///< version 1.4 ZE_API_VERSION_1_5 = ZE_MAKE_VERSION( 1, 5 ), ///< version 1.5 ZE_API_VERSION_1_6 = ZE_MAKE_VERSION( 1, 6 ), ///< version 1.6 ZE_API_VERSION_1_7 = ZE_MAKE_VERSION( 1, 7 ), ///< version 1.7 ZE_API_VERSION_1_8 = ZE_MAKE_VERSION( 1, 8 ), ///< version 1.8 ZE_API_VERSION_1_9 = ZE_MAKE_VERSION( 1, 9 ), ///< version 1.9 ZE_API_VERSION_1_10 = ZE_MAKE_VERSION( 1, 10 ), ///< version 1.10 ZE_API_VERSION_1_11 = ZE_MAKE_VERSION( 1, 11 ), ///< version 1.11 ZE_API_VERSION_1_12 = ZE_MAKE_VERSION( 1, 12 ), ///< version 1.12 ZE_API_VERSION_1_13 = ZE_MAKE_VERSION( 1, 13 ), ///< version 1.13 ZE_API_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 13 ), ///< latest known version ZE_API_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_API_VERSION_* ENUMs } ze_api_version_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_API_VERSION_CURRENT_M /// @brief Current API version as a macro #define ZE_API_VERSION_CURRENT_M ZE_MAKE_VERSION( 1, 13 ) #endif // ZE_API_VERSION_CURRENT_M /////////////////////////////////////////////////////////////////////////////// /// @brief Returns the API version supported by the specified driver /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == version` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetApiVersion( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance ze_api_version_t* version ///< [out] api version ); /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_DRIVER_UUID_SIZE /// @brief Maximum driver universal unique id (UUID) size in bytes #define ZE_MAX_DRIVER_UUID_SIZE 16 #endif // ZE_MAX_DRIVER_UUID_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief Driver universal unique id (UUID) typedef struct _ze_driver_uuid_t { uint8_t id[ZE_MAX_DRIVER_UUID_SIZE]; ///< [out] opaque data representing a driver UUID } ze_driver_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Driver properties queried using ::zeDriverGetProperties typedef struct _ze_driver_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_driver_uuid_t uuid; ///< [out] universal unique identifier. uint32_t driverVersion; ///< [out] driver version ///< The driver version is a non-zero, monotonically increasing value where ///< higher values always indicate a more recent version. } ze_driver_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves properties of the driver. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clGetPlatformInfo** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pDriverProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetProperties( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance ze_driver_properties_t* pDriverProperties ///< [in,out] query result for driver properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported IPC property flags typedef uint32_t ze_ipc_property_flags_t; typedef enum _ze_ipc_property_flag_t { ZE_IPC_PROPERTY_FLAG_MEMORY = ZE_BIT(0), ///< Supports passing memory allocations between processes. See ///< ::zeMemGetIpcHandle. ZE_IPC_PROPERTY_FLAG_EVENT_POOL = ZE_BIT(1), ///< Supports passing event pools between processes. See ///< ::zeEventPoolGetIpcHandle. ZE_IPC_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IPC_PROPERTY_FLAG_* ENUMs } ze_ipc_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief IPC properties queried using ::zeDriverGetIpcProperties typedef struct _ze_driver_ipc_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_ipc_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ::ze_ipc_property_flag_t } ze_driver_ipc_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves IPC attributes of the driver /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pIpcProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetIpcProperties( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance ze_driver_ipc_properties_t* pIpcProperties ///< [in,out] query result for IPC properties ); /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_EXTENSION_NAME /// @brief Maximum extension name string size #define ZE_MAX_EXTENSION_NAME 256 #endif // ZE_MAX_EXTENSION_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Extension properties queried using ::zeDriverGetExtensionProperties typedef struct _ze_driver_extension_properties_t { char name[ZE_MAX_EXTENSION_NAME]; ///< [out] extension name uint32_t version; ///< [out] extension version using ::ZE_MAKE_VERSION } ze_driver_extension_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves extension properties /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **vkEnumerateInstanceExtensionProperties** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetExtensionProperties( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance uint32_t* pCount, ///< [in,out] pointer to the number of extension properties. ///< if count is zero, then the driver shall update the value with the ///< total number of extension properties available. ///< if count is greater than the number of extension properties available, ///< then the driver shall update the value with the correct number of ///< extension properties available. ze_driver_extension_properties_t* pExtensionProperties ///< [in,out][optional][range(0, *pCount)] array of query results for ///< extension properties. ///< if count is less than the number of extension properties available, ///< then driver shall only retrieve that number of extension properties. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves function pointer for vendor-specific or experimental /// extensions /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == name` /// + `nullptr == ppFunctionAddress` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetExtensionFunctionAddress( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance const char* name, ///< [in] extension function name void** ppFunctionAddress ///< [out] pointer to function pointer ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves a string describing the last error code returned by the /// driver in the current thread. /// /// @details /// - String returned is thread local. /// - String is only updated on calls returning an error, i.e., not on calls /// returning ::ZE_RESULT_SUCCESS. /// - String may be empty if driver considers error code is already explicit /// enough to describe cause. /// - Memory pointed to by ppString is owned by the driver. /// - String returned is null-terminated. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ppString` ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetLastErrorDescription( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance const char** ppString ///< [in,out] pointer to a null-terminated array of characters describing ///< cause of error. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Device #if !defined(__GNUC__) #pragma region device #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves devices within a driver /// /// @details /// - Multiple calls to this function will return identical device handles, /// in the same order. /// - The number and order of handles returned from this function is /// affected by the ::ZE_AFFINITY_MASK and ::ZE_ENABLE_PCI_ID_DEVICE_ORDER /// environment variables. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGet( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance uint32_t* pCount, ///< [in,out] pointer to the number of devices. ///< if count is zero, then the driver shall update the value with the ///< total number of devices available. ///< if count is greater than the number of devices available, then the ///< driver shall update the value with the correct number of devices available. ze_device_handle_t* phDevices ///< [in,out][optional][range(0, *pCount)] array of handle of devices. ///< if count is less than the number of devices available, then driver ///< shall only retrieve that number of devices. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves the root-device of a device handle /// /// @details /// - When the device handle passed does not belong to any root-device, /// nullptr is returned. /// - Multiple calls to this function will return the same device handle. /// - The root-device handle returned by this function does not have access /// automatically to the resources /// created with the associated sub-device, unless those resources have /// been created with a context /// explicitly containing both handles. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phRootDevice` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetRootDevice( ze_device_handle_t hDevice, ///< [in] handle of the device object ze_device_handle_t* phRootDevice ///< [in,out] parent root device. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves a sub-device from a device /// /// @details /// - When the device handle passed does not contain any sub-device, a /// pCount of 0 is returned. /// - Multiple calls to this function will return identical device handles, /// in the same order. /// - The number of handles returned from this function is affected by the /// ::ZE_AFFINITY_MASK environment variable. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clCreateSubDevices /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetSubDevices( ze_device_handle_t hDevice, ///< [in] handle of the device object uint32_t* pCount, ///< [in,out] pointer to the number of sub-devices. ///< if count is zero, then the driver shall update the value with the ///< total number of sub-devices available. ///< if count is greater than the number of sub-devices available, then the ///< driver shall update the value with the correct number of sub-devices available. ze_device_handle_t* phSubdevices ///< [in,out][optional][range(0, *pCount)] array of handle of sub-devices. ///< if count is less than the number of sub-devices available, then driver ///< shall only retrieve that number of sub-devices. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device types typedef enum _ze_device_type_t { ZE_DEVICE_TYPE_GPU = 1, ///< Graphics Processing Unit ZE_DEVICE_TYPE_CPU = 2, ///< Central Processing Unit ZE_DEVICE_TYPE_FPGA = 3, ///< Field Programmable Gate Array ZE_DEVICE_TYPE_MCA = 4, ///< Memory Copy Accelerator ZE_DEVICE_TYPE_VPU = 5, ///< Vision Processing Unit ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_TYPE_* ENUMs } ze_device_type_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_DEVICE_UUID_SIZE /// @brief Maximum device universal unique id (UUID) size in bytes #define ZE_MAX_DEVICE_UUID_SIZE 16 #endif // ZE_MAX_DEVICE_UUID_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief Device universal unique id (UUID) typedef struct _ze_device_uuid_t { uint8_t id[ZE_MAX_DEVICE_UUID_SIZE]; ///< [out] opaque data representing a device UUID } ze_device_uuid_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_DEVICE_NAME /// @brief Maximum device name string size #define ZE_MAX_DEVICE_NAME 256 #endif // ZE_MAX_DEVICE_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device property flags typedef uint32_t ze_device_property_flags_t; typedef enum _ze_device_property_flag_t { ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0), ///< Device is integrated with the Host. ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1), ///< Device handle used for query represents a sub-device. ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2), ///< Device supports error correction memory access. ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3), ///< Device supports on-demand page-faulting. ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_PROPERTY_FLAG_* ENUMs } ze_device_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device properties queried using ::zeDeviceGetProperties typedef struct _ze_device_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_type_t type; ///< [out] generic device type uint32_t vendorId; ///< [out] vendor id from PCI configuration uint32_t deviceId; ///< [out] device id from PCI configuration. ///< Note, the device id uses little-endian format. ze_device_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ::ze_device_property_flag_t uint32_t subdeviceId; ///< [out] sub-device id. Only valid if ::ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE ///< is set. uint32_t coreClockRate; ///< [out] Clock rate for device core. uint64_t maxMemAllocSize; ///< [out] Maximum memory allocation size. uint32_t maxHardwareContexts; ///< [out] Maximum number of logical hardware contexts. uint32_t maxCommandQueuePriority; ///< [out] Maximum priority for command queues. Higher value is higher ///< priority. uint32_t numThreadsPerEU; ///< [out] Maximum number of threads per EU. uint32_t physicalEUSimdWidth; ///< [out] The physical EU simd width. uint32_t numEUsPerSubslice; ///< [out] Maximum number of EUs per sub-slice. uint32_t numSubslicesPerSlice; ///< [out] Maximum number of sub-slices per slice. uint32_t numSlices; ///< [out] Maximum number of slices. uint64_t timerResolution; ///< [out] Returns the resolution of device timer used for profiling, ///< timestamps, etc. When stype==::ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES the ///< units are in nanoseconds. When ///< stype==::ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2 units are in ///< cycles/sec uint32_t timestampValidBits; ///< [out] Returns the number of valid bits in the timestamp value. uint32_t kernelTimestampValidBits; ///< [out] Returns the number of valid bits in the kernel timestamp values ze_device_uuid_t uuid; ///< [out] universal unique identifier. Note: Subdevices will have their ///< own uuid. char name[ZE_MAX_DEVICE_NAME]; ///< [out] Device name } ze_device_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device thread identifier. typedef struct _ze_device_thread_t { uint32_t slice; ///< [in,out] the slice number. ///< Must be `UINT32_MAX` (all) or less than the `numSlices` member of ::ze_device_properties_t. uint32_t subslice; ///< [in,out] the sub-slice number within its slice. ///< Must be `UINT32_MAX` (all) or less than the `numSubslicesPerSlice` ///< member of ::ze_device_properties_t. uint32_t eu; ///< [in,out] the EU number within its sub-slice. ///< Must be `UINT32_MAX` (all) or less than the `numEUsPerSubslice` member ///< of ::ze_device_properties_t. uint32_t thread; ///< [in,out] the thread number within its EU. ///< Must be `UINT32_MAX` (all) or less than the `numThreadsPerEU` member ///< of ::ze_device_properties_t. } ze_device_thread_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves properties of the device. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clGetDeviceInfo /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pDeviceProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetProperties( ze_device_handle_t hDevice, ///< [in] handle of the device ze_device_properties_t* pDeviceProperties ///< [in,out] query result for device properties ); /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_SUBGROUPSIZE_COUNT /// @brief Maximum number of subgroup sizes supported. #define ZE_SUBGROUPSIZE_COUNT 8 #endif // ZE_SUBGROUPSIZE_COUNT /////////////////////////////////////////////////////////////////////////////// /// @brief Device compute properties queried using ::zeDeviceGetComputeProperties typedef struct _ze_device_compute_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t maxTotalGroupSize; ///< [out] Maximum items per compute group. (groupSizeX * groupSizeY * ///< groupSizeZ) <= maxTotalGroupSize uint32_t maxGroupSizeX; ///< [out] Maximum items for X dimension in group uint32_t maxGroupSizeY; ///< [out] Maximum items for Y dimension in group uint32_t maxGroupSizeZ; ///< [out] Maximum items for Z dimension in group uint32_t maxGroupCountX; ///< [out] Maximum groups that can be launched for x dimension uint32_t maxGroupCountY; ///< [out] Maximum groups that can be launched for y dimension uint32_t maxGroupCountZ; ///< [out] Maximum groups that can be launched for z dimension uint32_t maxSharedLocalMemory; ///< [out] Maximum shared local memory per group. uint32_t numSubGroupSizes; ///< [out] Number of subgroup sizes supported. This indicates number of ///< entries in subGroupSizes. uint32_t subGroupSizes[ZE_SUBGROUPSIZE_COUNT]; ///< [out] Size group sizes supported. } ze_device_compute_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves compute properties of the device. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clGetDeviceInfo /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pComputeProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetComputeProperties( ze_device_handle_t hDevice, ///< [in] handle of the device ze_device_compute_properties_t* pComputeProperties ///< [in,out] query result for compute properties ); /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_NATIVE_KERNEL_UUID_SIZE /// @brief Maximum native kernel universal unique id (UUID) size in bytes #define ZE_MAX_NATIVE_KERNEL_UUID_SIZE 16 #endif // ZE_MAX_NATIVE_KERNEL_UUID_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief Native kernel universal unique id (UUID) typedef struct _ze_native_kernel_uuid_t { uint8_t id[ZE_MAX_NATIVE_KERNEL_UUID_SIZE]; ///< [out] opaque data representing a native kernel UUID } ze_native_kernel_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device module flags typedef uint32_t ze_device_module_flags_t; typedef enum _ze_device_module_flag_t { ZE_DEVICE_MODULE_FLAG_FP16 = ZE_BIT(0), ///< Device supports 16-bit floating-point operations ZE_DEVICE_MODULE_FLAG_FP64 = ZE_BIT(1), ///< Device supports 64-bit floating-point operations ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS = ZE_BIT(2), ///< Device supports 64-bit atomic operations ZE_DEVICE_MODULE_FLAG_DP4A = ZE_BIT(3), ///< Device supports four component dot product and accumulate operations ZE_DEVICE_MODULE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_MODULE_FLAG_* ENUMs } ze_device_module_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported floating-Point capability flags typedef uint32_t ze_device_fp_flags_t; typedef enum _ze_device_fp_flag_t { ZE_DEVICE_FP_FLAG_DENORM = ZE_BIT(0), ///< Supports denorms ZE_DEVICE_FP_FLAG_INF_NAN = ZE_BIT(1), ///< Supports INF and quiet NaNs ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST = ZE_BIT(2), ///< Supports rounding to nearest even rounding mode ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO = ZE_BIT(3), ///< Supports rounding to zero. ZE_DEVICE_FP_FLAG_ROUND_TO_INF = ZE_BIT(4), ///< Supports rounding to both positive and negative INF. ZE_DEVICE_FP_FLAG_FMA = ZE_BIT(5), ///< Supports IEEE754-2008 fused multiply-add. ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT = ZE_BIT(6), ///< Supports rounding as defined by IEEE754 for divide and sqrt ///< operations. ZE_DEVICE_FP_FLAG_SOFT_FLOAT = ZE_BIT(7), ///< Uses software implementation for basic floating-point operations. ZE_DEVICE_FP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_FP_FLAG_* ENUMs } ze_device_fp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device module properties queried using ::zeDeviceGetModuleProperties typedef struct _ze_device_module_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t spirvVersionSupported; ///< [out] Maximum supported SPIR-V version. ///< Returns zero if SPIR-V is not supported. ///< Contains major and minor attributes, use ::ZE_MAJOR_VERSION and ::ZE_MINOR_VERSION. ze_device_module_flags_t flags; ///< [out] 0 or a valid combination of ::ze_device_module_flag_t ze_device_fp_flags_t fp16flags; ///< [out] Capabilities for half-precision floating-point operations. ///< returns 0 (if ::ZE_DEVICE_MODULE_FLAG_FP16 is not set) or a ///< combination of ::ze_device_fp_flag_t. ze_device_fp_flags_t fp32flags; ///< [out] Capabilities for single-precision floating-point operations. ///< returns a combination of ::ze_device_fp_flag_t. ze_device_fp_flags_t fp64flags; ///< [out] Capabilities for double-precision floating-point operations. ///< returns 0 (if ::ZE_DEVICE_MODULE_FLAG_FP64 is not set) or a ///< combination of ::ze_device_fp_flag_t. uint32_t maxArgumentsSize; ///< [out] Maximum kernel argument size that is supported. uint32_t printfBufferSize; ///< [out] Maximum size of internal buffer that holds output of printf ///< calls from kernel. ze_native_kernel_uuid_t nativeKernelSupported; ///< [out] Compatibility UUID of supported native kernel. ///< UUID may or may not be the same across driver release, devices, or ///< operating systems. ///< Application is responsible for ensuring UUID matches before creating ///< module using ///< previously created native kernel. } ze_device_module_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves module properties of the device /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pModuleProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetModuleProperties( ze_device_handle_t hDevice, ///< [in] handle of the device ze_device_module_properties_t* pModuleProperties ///< [in,out] query result for module properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported command queue group property flags typedef uint32_t ze_command_queue_group_property_flags_t; typedef enum _ze_command_queue_group_property_flag_t { ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE = ZE_BIT(0), ///< Command queue group supports enqueing compute commands. ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY = ZE_BIT(1), ///< Command queue group supports enqueing copy commands. ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS = ZE_BIT(2), ///< Command queue group supports cooperative kernels. ///< See ::zeCommandListAppendLaunchCooperativeKernel for more details. ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS = ZE_BIT(3), ///< Command queue groups supports metric queries. ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_* ENUMs } ze_command_queue_group_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Command queue group properties queried using /// ::zeDeviceGetCommandQueueGroupProperties typedef struct _ze_command_queue_group_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_command_queue_group_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ///< ::ze_command_queue_group_property_flag_t size_t maxMemoryFillPatternSize; ///< [out] maximum `pattern_size` supported by command queue group. ///< See ::zeCommandListAppendMemoryFill for more details. uint32_t numQueues; ///< [out] the number of physical engines within the group. } ze_command_queue_group_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves command queue group properties of the device. /// /// @details /// - Properties are reported for each physical command queue type supported /// by the device. /// - Multiple calls to this function will return properties in the same /// order. /// - The order in which the properties are returned defines the command /// queue group's ordinal. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **vkGetPhysicalDeviceQueueFamilyProperties** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCommandQueueGroupProperties( ze_device_handle_t hDevice, ///< [in] handle of the device uint32_t* pCount, ///< [in,out] pointer to the number of command queue group properties. ///< if count is zero, then the driver shall update the value with the ///< total number of command queue group properties available. ///< if count is greater than the number of command queue group properties ///< available, then the driver shall update the value with the correct ///< number of command queue group properties available. ze_command_queue_group_properties_t* pCommandQueueGroupProperties ///< [in,out][optional][range(0, *pCount)] array of query results for ///< command queue group properties. ///< if count is less than the number of command queue group properties ///< available, then driver shall only retrieve that number of command ///< queue group properties. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device memory property flags typedef uint32_t ze_device_memory_property_flags_t; typedef enum _ze_device_memory_property_flag_t { ZE_DEVICE_MEMORY_PROPERTY_FLAG_TBD = ZE_BIT(0), ///< reserved for future use ZE_DEVICE_MEMORY_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_MEMORY_PROPERTY_FLAG_* ENUMs } ze_device_memory_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device local memory properties queried using /// ::zeDeviceGetMemoryProperties typedef struct _ze_device_memory_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_memory_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ///< ::ze_device_memory_property_flag_t uint32_t maxClockRate; ///< [out] Maximum clock rate for device memory. uint32_t maxBusWidth; ///< [out] Maximum bus width between device and memory. uint64_t totalSize; ///< [out] Total memory size in bytes that is available to the device. char name[ZE_MAX_DEVICE_NAME]; ///< [out] Memory name } ze_device_memory_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves local memory properties of the device. /// /// @details /// - Properties are reported for each physical memory type supported by the /// device. /// - Multiple calls to this function will return properties in the same /// order. /// - The order in which the properties are returned defines the device's /// local memory ordinal. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clGetDeviceInfo /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryProperties( ze_device_handle_t hDevice, ///< [in] handle of the device uint32_t* pCount, ///< [in,out] pointer to the number of memory properties. ///< if count is zero, then the driver shall update the value with the ///< total number of memory properties available. ///< if count is greater than the number of memory properties available, ///< then the driver shall update the value with the correct number of ///< memory properties available. ze_device_memory_properties_t* pMemProperties ///< [in,out][optional][range(0, *pCount)] array of query results for ///< memory properties. ///< if count is less than the number of memory properties available, then ///< driver shall only retrieve that number of memory properties. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Memory access capability flags /// /// @details /// - Supported access capabilities for different types of memory /// allocations typedef uint32_t ze_memory_access_cap_flags_t; typedef enum _ze_memory_access_cap_flag_t { ZE_MEMORY_ACCESS_CAP_FLAG_RW = ZE_BIT(0), ///< Supports load/store access ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC = ZE_BIT(1), ///< Supports atomic access ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT = ZE_BIT(2), ///< Supports concurrent access ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC = ZE_BIT(3), ///< Supports concurrent atomic access ZE_MEMORY_ACCESS_CAP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_ACCESS_CAP_FLAG_* ENUMs } ze_memory_access_cap_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device memory access properties queried using /// ::zeDeviceGetMemoryAccessProperties typedef struct _ze_device_memory_access_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_memory_access_cap_flags_t hostAllocCapabilities; ///< [out] host memory capabilities. ///< returns 0 (unsupported) or a combination of ::ze_memory_access_cap_flag_t. ze_memory_access_cap_flags_t deviceAllocCapabilities; ///< [out] device memory capabilities. ///< returns 0 (unsupported) or a combination of ::ze_memory_access_cap_flag_t. ze_memory_access_cap_flags_t sharedSingleDeviceAllocCapabilities; ///< [out] shared, single-device memory capabilities. ///< returns 0 (unsupported) or a combination of ::ze_memory_access_cap_flag_t. ze_memory_access_cap_flags_t sharedCrossDeviceAllocCapabilities; ///< [out] shared, cross-device memory capabilities. ///< returns 0 (unsupported) or a combination of ::ze_memory_access_cap_flag_t. ze_memory_access_cap_flags_t sharedSystemAllocCapabilities; ///< [out] shared, system memory capabilities. ///< returns 0 (unsupported) or a combination of ::ze_memory_access_cap_flag_t. } ze_device_memory_access_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves memory access properties of the device. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clGetDeviceInfo /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pMemAccessProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryAccessProperties( ze_device_handle_t hDevice, ///< [in] handle of the device ze_device_memory_access_properties_t* pMemAccessProperties ///< [in,out] query result for memory access properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported cache control property flags typedef uint32_t ze_device_cache_property_flags_t; typedef enum _ze_device_cache_property_flag_t { ZE_DEVICE_CACHE_PROPERTY_FLAG_USER_CONTROL = ZE_BIT(0), ///< Device support User Cache Control (i.e. SLM section vs Generic Cache) ZE_DEVICE_CACHE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_CACHE_PROPERTY_FLAG_* ENUMs } ze_device_cache_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device cache properties queried using ::zeDeviceGetCacheProperties typedef struct _ze_device_cache_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_cache_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ///< ::ze_device_cache_property_flag_t size_t cacheSize; ///< [out] Per-cache size, in bytes } ze_device_cache_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves cache properties of the device /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clGetDeviceInfo /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCacheProperties( ze_device_handle_t hDevice, ///< [in] handle of the device uint32_t* pCount, ///< [in,out] pointer to the number of cache properties. ///< if count is zero, then the driver shall update the value with the ///< total number of cache properties available. ///< if count is greater than the number of cache properties available, ///< then the driver shall update the value with the correct number of ///< cache properties available. ze_device_cache_properties_t* pCacheProperties ///< [in,out][optional][range(0, *pCount)] array of query results for cache properties. ///< if count is less than the number of cache properties available, then ///< driver shall only retrieve that number of cache properties. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Device image properties queried using ::zeDeviceGetImageProperties typedef struct _ze_device_image_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t maxImageDims1D; ///< [out] Maximum image dimensions for 1D resources. if 0, then 1D images ///< are unsupported. uint32_t maxImageDims2D; ///< [out] Maximum image dimensions for 2D resources. if 0, then 2D images ///< are unsupported. uint32_t maxImageDims3D; ///< [out] Maximum image dimensions for 3D resources. if 0, then 3D images ///< are unsupported. uint64_t maxImageBufferSize; ///< [out] Maximum image buffer size in bytes. if 0, then buffer images are ///< unsupported. uint32_t maxImageArraySlices; ///< [out] Maximum image array slices. if 0, then image arrays are ///< unsupported. uint32_t maxSamplers; ///< [out] Max samplers that can be used in kernel. if 0, then sampling is ///< unsupported. uint32_t maxReadImageArgs; ///< [out] Returns the maximum number of simultaneous image objects that ///< can be read from by a kernel. if 0, then reading images is ///< unsupported. uint32_t maxWriteImageArgs; ///< [out] Returns the maximum number of simultaneous image objects that ///< can be written to by a kernel. if 0, then writing images is ///< unsupported. } ze_device_image_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves image properties of the device /// /// @details /// - See ::zeImageGetProperties for format-specific capabilities. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pImageProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetImageProperties( ze_device_handle_t hDevice, ///< [in] handle of the device ze_device_image_properties_t* pImageProperties ///< [in,out] query result for image properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Device external memory import and export properties typedef struct _ze_device_external_memory_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_memory_type_flags_t memoryAllocationImportTypes; ///< [out] Supported external memory import types for memory allocations. ze_external_memory_type_flags_t memoryAllocationExportTypes; ///< [out] Supported external memory export types for memory allocations. ze_external_memory_type_flags_t imageImportTypes; ///< [out] Supported external memory import types for images. ze_external_memory_type_flags_t imageExportTypes; ///< [out] Supported external memory export types for images. } ze_device_external_memory_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves external memory import and export of the device /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pExternalMemoryProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetExternalMemoryProperties( ze_device_handle_t hDevice, ///< [in] handle of the device ze_device_external_memory_properties_t* pExternalMemoryProperties ///< [in,out] query result for external memory properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device peer-to-peer property flags typedef uint32_t ze_device_p2p_property_flags_t; typedef enum _ze_device_p2p_property_flag_t { ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS = ZE_BIT(0), ///< Device supports access between peer devices. ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS = ZE_BIT(1), ///< Device supports atomics between peer devices. ZE_DEVICE_P2P_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_P2P_PROPERTY_FLAG_* ENUMs } ze_device_p2p_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device peer-to-peer properties queried using /// ::zeDeviceGetP2PProperties typedef struct _ze_device_p2p_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_p2p_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ///< ::ze_device_p2p_property_flag_t } ze_device_p2p_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves peer-to-peer properties between one device and a peer /// devices /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// + `nullptr == hPeerDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pP2PProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetP2PProperties( ze_device_handle_t hDevice, ///< [in] handle of the device performing the access ze_device_handle_t hPeerDevice, ///< [in] handle of the peer device with the allocation ze_device_p2p_properties_t* pP2PProperties ///< [in,out] Peer-to-Peer properties between source and peer device ); /////////////////////////////////////////////////////////////////////////////// /// @brief Queries if one device can directly access peer device allocations /// /// @details /// - Any device can access any other device within a node through a /// scale-up fabric. /// - The following are conditions for CanAccessPeer query. /// + If both device and peer device are the same then return true. /// + If both sub-device and peer sub-device are the same then return /// true. /// + If both are sub-devices and share the same parent device then /// return true. /// + If both device and remote device are connected by a direct or /// indirect scale-up fabric or over PCIe (same root complex or shared /// PCIe switch) then true. /// + If both sub-device and remote parent device (and vice-versa) are /// connected by a direct or indirect scale-up fabric or over PCIe /// (same root complex or shared PCIe switch) then true. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// + `nullptr == hPeerDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == value` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceCanAccessPeer( ze_device_handle_t hDevice, ///< [in] handle of the device performing the access ze_device_handle_t hPeerDevice, ///< [in] handle of the peer device with the allocation ze_bool_t* value ///< [out] returned access capability ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns current status of the device. /// /// @details /// - Once a device is reset, this call will update the OS handle attached /// to the device handle. /// - The application may call this function from simultaneous threads with /// the same device handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_SUCCESS /// + Device is available for use. /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// + Device is lost; must be reset for use. ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetStatus( ze_device_handle_t hDevice ///< [in] handle of the device ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns synchronized Host and device global timestamps. /// /// @details /// - The application may call this function from simultaneous threads with /// the same device handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == hostTimestamp` /// + `nullptr == deviceTimestamp` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE /// + The feature is not supported by the underlying platform. ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetGlobalTimestamps( ze_device_handle_t hDevice, ///< [in] handle of the device uint64_t* hostTimestamp, ///< [out] value of the Host's global timestamp that correlates with the ///< Device's global timestamp value. uint64_t* deviceTimestamp ///< [out] value of the Device's global timestamp that correlates with the ///< Host's global timestamp value. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Context #if !defined(__GNUC__) #pragma region context #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported context creation flags typedef uint32_t ze_context_flags_t; typedef enum _ze_context_flag_t { ZE_CONTEXT_FLAG_TBD = ZE_BIT(0), ///< reserved for future use ZE_CONTEXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_CONTEXT_FLAG_* ENUMs } ze_context_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Context descriptor typedef struct _ze_context_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_context_flags_t flags; ///< [in] creation flags. ///< must be 0 (default) or a valid combination of ::ze_context_flag_t; ///< default behavior may use implicit driver-based heuristics. } ze_context_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a context for the driver. /// /// @details /// - The application must only use the context for the driver which was /// provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phContext` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x1 < desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreate( ze_driver_handle_t hDriver, ///< [in] handle of the driver object const ze_context_desc_t* desc, ///< [in] pointer to context descriptor ze_context_handle_t* phContext ///< [out] pointer to handle of context object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a context for the driver. /// /// @details /// - The application must only use the context for the driver which was /// provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phContext` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x1 < desc->flags` /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phDevices) && (0 < numDevices)` ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreateEx( ze_driver_handle_t hDriver, ///< [in] handle of the driver object const ze_context_desc_t* desc, ///< [in] pointer to context descriptor uint32_t numDevices, ///< [in][optional] number of device handles; must be 0 if `nullptr == ///< phDevices` ze_device_handle_t* phDevices, ///< [in][optional][range(0, numDevices)] array of device handles which ///< context has visibility. ///< if nullptr, then all devices and any sub-devices supported by the ///< driver instance are ///< visible to the context. ///< otherwise, the context only has visibility to the devices and any ///< sub-devices of the ///< devices in this array. ze_context_handle_t* phContext ///< [out] pointer to handle of context object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a context. /// /// @details /// - The application must ensure the device is not currently referencing /// the context before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this context. /// - The application must **not** call this function from simultaneous /// threads with the same context handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeContextDestroy( ze_context_handle_t hContext ///< [in][release] handle of context object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns current status of the context. /// /// @details /// - The application may call this function from simultaneous threads with /// the same context handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_SUCCESS /// + Context is available for use. /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// + Context is invalid; due to device lost or reset. ZE_APIEXPORT ze_result_t ZE_APICALL zeContextGetStatus( ze_context_handle_t hContext ///< [in] handle of context object ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Command Queue #if !defined(__GNUC__) #pragma region cmdqueue #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported command queue flags typedef uint32_t ze_command_queue_flags_t; typedef enum _ze_command_queue_flag_t { ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY = ZE_BIT(0), ///< command queue should be optimized for submission to a single device engine. ///< driver **must** disable any implicit optimizations for distributing ///< work across multiple engines. ///< this flag should be used when applications want full control over ///< multi-engine submission and scheduling. ///< This flag is **DEPRECATED** as flag ///< ${X}_COMMAND_LIST_FLAG_EXPLICIT_ONLY is **DEPRECATED**. ZE_COMMAND_QUEUE_FLAG_IN_ORDER = ZE_BIT(1), ///< To be used only when creating immediate command lists. Commands ///< appended to the immediate command ///< list are executed in-order, with driver implementation enforcing ///< dependencies between them. ///< Application is not required to have the signal event of a given ///< command being the wait event of ///< the next to define an in-order list, and application is allowed to ///< pass signal and wait events ///< to each appended command to implement more complex dependency graphs. ZE_COMMAND_QUEUE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_COMMAND_QUEUE_FLAG_* ENUMs } ze_command_queue_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported command queue modes typedef enum _ze_command_queue_mode_t { ZE_COMMAND_QUEUE_MODE_DEFAULT = 0, ///< implicit default behavior; uses driver-based heuristics ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS = 1, ///< Device execution always completes immediately on execute; ///< Host thread is blocked using wait on implicit synchronization object ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS = 2, ///< Device execution is scheduled and will complete in future; ///< explicit synchronization object must be used to determine completeness ZE_COMMAND_QUEUE_MODE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_COMMAND_QUEUE_MODE_* ENUMs } ze_command_queue_mode_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported command queue priorities typedef enum _ze_command_queue_priority_t { ZE_COMMAND_QUEUE_PRIORITY_NORMAL = 0, ///< [default] normal priority ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW = 1, ///< lower priority than normal ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH = 2, ///< higher priority than normal ZE_COMMAND_QUEUE_PRIORITY_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_COMMAND_QUEUE_PRIORITY_* ENUMs } ze_command_queue_priority_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Command Queue descriptor typedef struct _ze_command_queue_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t ordinal; ///< [in] command queue group ordinal uint32_t index; ///< [in] command queue index within the group; ///< must be zero. ze_command_queue_flags_t flags; ///< [in] usage flags. ///< must be 0 (default) or a valid combination of ::ze_command_queue_flag_t; ///< default behavior may use implicit driver-based heuristics to balance ///< latency and throughput. ze_command_queue_mode_t mode; ///< [in] operation mode ze_command_queue_priority_t priority; ///< [in] priority } ze_command_queue_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a command queue on the context. /// /// @details /// - A command queue represents a logical input stream to the device, tied /// to a physical input stream. /// - The application must only use the command queue for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @remarks /// _Analogues_ /// - **clCreateCommandQueue** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phCommandQueue` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// + `::ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS < desc->mode` /// + `::ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH < desc->priority` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueCreate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device object const ze_command_queue_desc_t* desc, ///< [in] pointer to command queue descriptor ze_command_queue_handle_t* phCommandQueue ///< [out] pointer to handle of command queue object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a command queue. /// /// @details /// - The application must destroy all fence handles created from the /// command queue before destroying the command queue itself /// - The application must ensure the device is not currently referencing /// the command queue before it is deleted /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this command queue /// - The application must **not** call this function from simultaneous /// threads with the same command queue handle. /// - The implementation of this function must be thread-safe. /// /// @remarks /// _Analogues_ /// - **clReleaseCommandQueue** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandQueue` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueDestroy( ze_command_queue_handle_t hCommandQueue ///< [in][release] handle of command queue object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Executes a command list in a command queue. /// /// @details /// - The command lists are submitted to the device in the order they are /// received, whether from multiple calls (on the same or different /// threads) or a single call with multiple command lists. /// - The application must ensure the command lists are accessible by the /// device on which the command queue was created. /// - The application must ensure the device is not currently referencing /// the command list since the implementation is allowed to modify the /// contents of the command list for submission. /// - The application must only execute command lists created with an /// identical command queue group ordinal to the command queue. /// - The application must use a fence created using the same command queue. /// - The application must ensure the command queue, command list and fence /// were created on the same context. /// - The application must ensure the command lists being executed are not /// immediate command lists. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - vkQueueSubmit /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandQueue` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phCommandLists` /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `0 == numCommandLists` /// - ::ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueExecuteCommandLists( ze_command_queue_handle_t hCommandQueue, ///< [in] handle of the command queue uint32_t numCommandLists, ///< [in] number of command lists to execute ze_command_list_handle_t* phCommandLists, ///< [in][range(0, numCommandLists)] list of handles of the command lists ///< to execute ze_fence_handle_t hFence ///< [in][optional] handle of the fence to signal on completion ); /////////////////////////////////////////////////////////////////////////////// /// @brief Synchronizes a command queue by waiting on the host. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandQueue` /// - ::ZE_RESULT_NOT_READY /// + timeout expired ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueSynchronize( ze_command_queue_handle_t hCommandQueue, ///< [in] handle of the command queue uint64_t timeout ///< [in] if non-zero, then indicates the maximum time (in nanoseconds) to ///< yield before returning ::ZE_RESULT_SUCCESS or ::ZE_RESULT_NOT_READY; ///< if zero, then immediately returns the status of the command queue; ///< if `UINT64_MAX`, then function will not return until complete or ///< device is lost. ///< Due to external dependencies, timeout may be rounded to the closest ///< value allowed by the accuracy of those dependencies. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the command queue group ordinal. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandQueue` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pOrdinal` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueGetOrdinal( ze_command_queue_handle_t hCommandQueue, ///< [in] handle of the command queue uint32_t* pOrdinal ///< [out] command queue group ordinal ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the command queue index within the group. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandQueue` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pIndex` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueGetIndex( ze_command_queue_handle_t hCommandQueue, ///< [in] handle of the command queue uint32_t* pIndex ///< [out] command queue index within the group ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Command List #if !defined(__GNUC__) #pragma region cmdlist #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported command list creation flags typedef uint32_t ze_command_list_flags_t; typedef enum _ze_command_list_flag_t { ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING = ZE_BIT(0), ///< driver may reorder commands (e.g., kernels, copies) between barriers ///< and synchronization primitives. ///< using this flag may increase Host overhead of ::zeCommandListClose. ///< therefore, this flag should **not** be set for low-latency usage-models. ZE_COMMAND_LIST_FLAG_MAXIMIZE_THROUGHPUT = ZE_BIT(1), ///< driver may perform additional optimizations that increase execution ///< throughput. ///< using this flag may increase Host overhead of ::zeCommandListClose and ::zeCommandQueueExecuteCommandLists. ///< therefore, this flag should **not** be set for low-latency usage-models. ZE_COMMAND_LIST_FLAG_EXPLICIT_ONLY = ZE_BIT(2), ///< command list should be optimized for submission to a single command ///< queue and device engine. ///< driver **must** disable any implicit optimizations for distributing ///< work across multiple engines. ///< this flag should be used when applications want full control over ///< multi-engine submission and scheduling. ///< This flag is **DEPRECATED** and implementations are not expected to ///< support this feature. ZE_COMMAND_LIST_FLAG_IN_ORDER = ZE_BIT(3), ///< commands appended to this command list are executed in-order, with ///< driver implementation ///< enforcing dependencies between them. Application is not required to ///< have the signal event ///< of a given command being the wait event of the next to define an ///< in-order list, and ///< application is allowed to pass signal and wait events to each appended ///< command to implement ///< more complex dependency graphs. Cannot be combined with ::ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING. ZE_COMMAND_LIST_FLAG_EXP_CLONEABLE = ZE_BIT(4), ///< this command list may be cloned using ::zeCommandListCreateCloneExp ///< after ::zeCommandListClose. ZE_COMMAND_LIST_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_COMMAND_LIST_FLAG_* ENUMs } ze_command_list_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Command List descriptor typedef struct _ze_command_list_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t commandQueueGroupOrdinal; ///< [in] command queue group ordinal to which this command list will be ///< submitted ze_command_list_flags_t flags; ///< [in] usage flags. ///< must be 0 (default) or a valid combination of ::ze_command_list_flag_t; ///< default behavior may use implicit driver-based heuristics to balance ///< latency and throughput. } ze_command_list_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a command list on the context. /// /// @details /// - A command list represents a sequence of commands for execution on a /// command queue. /// - The command list is created in the 'open' state. /// - The application must only use the command list for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phCommandList` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x1f < desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device object const ze_command_list_desc_t* desc, ///< [in] pointer to command list descriptor ze_command_list_handle_t* phCommandList ///< [out] pointer to handle of command list object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates an immediate command list on the context. /// /// @details /// - An immediate command list is used for low-latency submission of /// commands. /// - An immediate command list creates an implicit command queue. /// - Immediate command lists must not be passed to /// ::zeCommandQueueExecuteCommandLists. /// - Commands appended into an immediate command list may execute /// synchronously, by blocking until the command is complete. /// - The command list is created in the 'open' state and never needs to be /// closed. /// - The application must only use the command list for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == altdesc` /// + `nullptr == phCommandList` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < altdesc->flags` /// + `::ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS < altdesc->mode` /// + `::ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH < altdesc->priority` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreateImmediate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device object const ze_command_queue_desc_t* altdesc, ///< [in] pointer to command queue descriptor ze_command_list_handle_t* phCommandList ///< [out] pointer to handle of command list object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a command list. /// /// @details /// - The application must ensure the device is not currently referencing /// the command list before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this command list. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListDestroy( ze_command_list_handle_t hCommandList ///< [in][release] handle of command list object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Closes a command list; ready to be executed by a command queue. /// /// @details /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListClose( ze_command_list_handle_t hCommandList ///< [in] handle of command list object to close ); /////////////////////////////////////////////////////////////////////////////// /// @brief Reset a command list to initial (empty) state; ready for appending /// commands. /// /// @details /// - The application must ensure the device is not currently referencing /// the command list before it is reset /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListReset( ze_command_list_handle_t hCommandList ///< [in] handle of command list object to reset ); /////////////////////////////////////////////////////////////////////////////// /// @brief Appends a memory write of the device's global timestamp value into a /// command list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The timestamp frequency can be queried from the `timerResolution` /// member of ::ze_device_properties_t. /// - The number of valid bits in the timestamp value can be queried from /// the `timestampValidBits` member of ::ze_device_properties_t. /// - The application must ensure the memory pointed to by dstptr is /// accessible by the device on which the command list was created. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWriteGlobalTimestamp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint64_t* dstptr, ///< [in,out] pointer to memory where timestamp value will be written; must ///< be 8byte-aligned. ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing query; ///< must be 0 if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before executing query ); /////////////////////////////////////////////////////////////////////////////// /// @brief Synchronizes an immediate command list by waiting on the host for the /// completion of all commands previously submitted to it. /// /// @details /// - The application must call this function only with command lists /// created with ::zeCommandListCreateImmediate. /// - Waiting on one immediate command list shall not block the concurrent /// execution of commands appended to other /// immediate command lists created with either a different ordinal or /// different index. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_NOT_READY /// + timeout expired /// - ::ZE_RESULT_ERROR_INVALID_ARGUMENT /// + handle does not correspond to an immediate command list ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListHostSynchronize( ze_command_list_handle_t hCommandList, ///< [in] handle of the immediate command list uint64_t timeout ///< [in] if non-zero, then indicates the maximum time (in nanoseconds) to ///< yield before returning ::ZE_RESULT_SUCCESS or ::ZE_RESULT_NOT_READY; ///< if zero, then immediately returns the status of the immediate command list; ///< if `UINT64_MAX`, then function will not return until complete or ///< device is lost. ///< Due to external dependencies, timeout may be rounded to the closest ///< value allowed by the accuracy of those dependencies. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the handle of the device on which the command list was created. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phDevice` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListGetDeviceHandle( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_device_handle_t* phDevice ///< [out] handle of the device on which the command list was created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the handle of the context on which the command list was created. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phContext` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListGetContextHandle( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_context_handle_t* phContext ///< [out] handle of the context on which the command list was created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the command queue group ordinal to which the command list is /// submitted. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pOrdinal` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListGetOrdinal( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint32_t* pOrdinal ///< [out] command queue group ordinal to which command list is submitted ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the command queue index within the group to which the immediate /// command list is submitted. /// /// @details /// - The application must call this function only with command lists /// created with ::zeCommandListCreateImmediate. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandListImmediate` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pIndex` /// - ::ZE_RESULT_ERROR_INVALID_ARGUMENT /// + handle does not correspond to an immediate command list ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListImmediateGetIndex( ze_command_list_handle_t hCommandListImmediate, ///< [in] handle of the immediate command list uint32_t* pIndex ///< [out] command queue index within the group to which the immediate ///< command list is submitted ); /////////////////////////////////////////////////////////////////////////////// /// @brief Query whether a command list is an immediate command list. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pIsImmediate` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListIsImmediate( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_bool_t* pIsImmediate ///< [out] Boolean indicating whether the command list is an immediate ///< command list (true) or not (false) ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Barrier #if !defined(__GNUC__) #pragma region barrier #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Appends an execution and global memory barrier into a command list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - If numWaitEvents is zero, then all previous commands, enqueued on same /// command queue, must complete prior to the execution of the barrier. /// This is not the case when numWaitEvents is non-zero. /// - If numWaitEvents is non-zero, then only all phWaitEvents must be /// signaled prior to the execution of the barrier. /// - This command blocks all following commands from beginning until the /// execution of the barrier completes. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **vkCmdPipelineBarrier** /// - clEnqueueBarrierWithWaitList /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendBarrier( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing barrier; ///< must be 0 if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before executing barrier ); /////////////////////////////////////////////////////////////////////////////// /// @brief Appends a global memory ranges barrier into a command list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - If numWaitEvents is zero, then all previous commands are completed /// prior to the execution of the barrier. /// - If numWaitEvents is non-zero, then then all phWaitEvents must be /// signaled prior to the execution of the barrier. /// - This command blocks all following commands from beginning until the /// execution of the barrier completes. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pRangeSizes` /// + `nullptr == pRanges` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryRangesBarrier( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint32_t numRanges, ///< [in] number of memory ranges const size_t* pRangeSizes, ///< [in][range(0, numRanges)] array of sizes of memory range const void** pRanges, ///< [in][range(0, numRanges)] array of memory ranges ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing barrier; ///< must be 0 if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before executing barrier ); /////////////////////////////////////////////////////////////////////////////// /// @brief Ensures in-bound writes to the device are globally observable. /// /// @details /// - This is a special-case system level barrier that can be used to ensure /// global observability of writes; /// typically needed after a producer (e.g., NIC) performs direct writes /// to the device's memory (e.g., Direct RDMA writes). /// This is typically required when the memory corresponding to the writes /// is subsequently accessed from a remote device. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` ZE_APIEXPORT ze_result_t ZE_APICALL zeContextSystemBarrier( ze_context_handle_t hContext, ///< [in] handle of context object ze_device_handle_t hDevice ///< [in] handle of the device ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Copies #if !defined(__GNUC__) #pragma region copy #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Copies host, device, or shared memory. /// /// @details /// - The application must ensure the memory pointed to by dstptr and srcptr /// is accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr and /// srcptr as they are free to be modified by either the Host or device up /// until execution. /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clEnqueueCopyBuffer** /// - **clEnqueueReadBuffer** /// - **clEnqueueWriteBuffer** /// - **clEnqueueSVMMemcpy** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// + `nullptr == srcptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopy( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* dstptr, ///< [in] pointer to destination memory to copy to const void* srcptr, ///< [in] pointer to source memory to copy from size_t size, ///< [in] size in bytes to copy ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Initializes host, device, or shared memory. /// /// @details /// - The application must ensure the memory pointed to by dstptr is /// accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr as /// it is free to be modified by either the Host or device up until /// execution. /// - The value to initialize memory to is described by the pattern and the /// pattern size. /// - The pattern size must be a power-of-two and less than or equal to the /// `maxMemoryFillPatternSize` member of /// ::ze_command_queue_group_properties_t. /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clEnqueueFillBuffer** /// - **clEnqueueSVMMemFill** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// + `nullptr == pattern` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryFill( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* ptr, ///< [in] pointer to memory to initialize const void* pattern, ///< [in] pointer to value to initialize memory to size_t pattern_size, ///< [in] size in bytes of the value to initialize memory to size_t size, ///< [in] size in bytes to initialize ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copy region descriptor typedef struct _ze_copy_region_t { uint32_t originX; ///< [in] The origin x offset for region in bytes uint32_t originY; ///< [in] The origin y offset for region in rows uint32_t originZ; ///< [in] The origin z offset for region in slices uint32_t width; ///< [in] The region width relative to origin in bytes uint32_t height; ///< [in] The region height relative to origin in rows uint32_t depth; ///< [in] The region depth relative to origin in slices. Set this to 0 for ///< 2D copy. } ze_copy_region_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Copies a region from a 2D or 3D array of host, device, or shared /// memory. /// /// @details /// - The application must ensure the memory pointed to by dstptr and srcptr /// is accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr and /// srcptr as they are free to be modified by either the Host or device up /// until execution. /// - The region width, height, and depth for both src and dst must be same. /// The origins can be different. /// - The src and dst regions cannot be overlapping. /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// + `nullptr == dstRegion` /// + `nullptr == srcptr` /// + `nullptr == srcRegion` /// - ::ZE_RESULT_ERROR_OVERLAPPING_REGIONS /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyRegion( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* dstptr, ///< [in] pointer to destination memory to copy to const ze_copy_region_t* dstRegion, ///< [in] pointer to destination region to copy to uint32_t dstPitch, ///< [in] destination pitch in bytes uint32_t dstSlicePitch, ///< [in] destination slice pitch in bytes. This is required for 3D region ///< copies where the `depth` member of ::ze_copy_region_t is not 0, ///< otherwise it's ignored. const void* srcptr, ///< [in] pointer to source memory to copy from const ze_copy_region_t* srcRegion, ///< [in] pointer to source region to copy from uint32_t srcPitch, ///< [in] source pitch in bytes uint32_t srcSlicePitch, ///< [in] source slice pitch in bytes. This is required for 3D region ///< copies where the `depth` member of ::ze_copy_region_t is not 0, ///< otherwise it's ignored. ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copies host, device, or shared memory from another context. /// /// @details /// - The current active and source context must be from the same driver. /// - The application must ensure the memory pointed to by dstptr and srcptr /// is accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr and /// srcptr as they are free to be modified by either the Host or device up /// until execution. /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hContextSrc` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// + `nullptr == srcptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyFromContext( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* dstptr, ///< [in] pointer to destination memory to copy to ze_context_handle_t hContextSrc, ///< [in] handle of source context object const void* srcptr, ///< [in] pointer to source memory to copy from size_t size, ///< [in] size in bytes to copy ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copies an image. /// /// @details /// - The application must ensure the image and events are accessible by the /// device on which the command list was created. /// - The application must ensure the image format descriptors for both /// source and destination images are the same. /// - The application must ensure the command list, images and events were /// created on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clEnqueueCopyImage** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hDstImage` /// + `nullptr == hSrcImage` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopy( ze_command_list_handle_t hCommandList, ///< [in] handle of command list ze_image_handle_t hDstImage, ///< [in] handle of destination image to copy to ze_image_handle_t hSrcImage, ///< [in] handle of source image to copy from ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Region descriptor typedef struct _ze_image_region_t { uint32_t originX; ///< [in] The origin x offset for region in pixels uint32_t originY; ///< [in] The origin y offset for region in pixels uint32_t originZ; ///< [in] The origin z offset for region in pixels uint32_t width; ///< [in] The region width relative to origin in pixels uint32_t height; ///< [in] The region height relative to origin in pixels uint32_t depth; ///< [in] The region depth relative to origin. For 1D or 2D images, set ///< this to 1. } ze_image_region_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Copies a region of an image to another image. /// /// @details /// - The application must ensure the image and events are accessible by the /// device on which the command list was created. /// - The region width and height for both src and dst must be same. The /// origins can be different. /// - The src and dst regions cannot be overlapping. /// - The application must ensure the image format descriptors for both /// source and destination images are the same. /// - The application must ensure the command list, images and events were /// created, and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hDstImage` /// + `nullptr == hSrcImage` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_OVERLAPPING_REGIONS /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyRegion( ze_command_list_handle_t hCommandList, ///< [in] handle of command list ze_image_handle_t hDstImage, ///< [in] handle of destination image to copy to ze_image_handle_t hSrcImage, ///< [in] handle of source image to copy from const ze_image_region_t* pDstRegion, ///< [in][optional] destination region descriptor const ze_image_region_t* pSrcRegion, ///< [in][optional] source region descriptor ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copies from an image to device or shared memory. /// /// @details /// - The application must ensure the memory pointed to by dstptr is /// accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr as /// it is free to be modified by either the Host or device up until /// execution. /// - The application must ensure the image and events are accessible by the /// device on which the command list was created. /// - The application must ensure the image format descriptor for the source /// image is a single-planar format. /// - The application must ensure the command list, image and events were /// created, and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clEnqueueReadImage /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hSrcImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyToMemory( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* dstptr, ///< [in] pointer to destination memory to copy to ze_image_handle_t hSrcImage, ///< [in] handle of source image to copy from const ze_image_region_t* pSrcRegion, ///< [in][optional] source region descriptor ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copies to an image from device or shared memory. /// /// @details /// - The application must ensure the memory pointed to by srcptr is /// accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by srcptr as /// it is free to be modified by either the Host or device up until /// execution. /// - The application must ensure the image and events are accessible by the /// device on which the command list was created. /// - The application must ensure the image format descriptor for the /// destination image is a single-planar format. /// - The application must ensure the command list, image and events were /// created, and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clEnqueueWriteImage /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hDstImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == srcptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyFromMemory( ze_command_list_handle_t hCommandList, ///< [in] handle of command list ze_image_handle_t hDstImage, ///< [in] handle of destination image to copy to const void* srcptr, ///< [in] pointer to source memory to copy from const ze_image_region_t* pDstRegion, ///< [in][optional] destination region descriptor ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Asynchronously prefetches shared memory to the device associated with /// the specified command list /// /// @details /// - This is a hint to improve performance only and is not required for /// correctness. /// - Only prefetching to the device associated with the specified command /// list is supported. /// Prefetching to the host or to a peer device is not supported. /// - Prefetching may not be supported for all allocation types for all devices. /// If memory prefetching is not supported for the specified memory range /// the prefetch hint may be ignored. /// - Prefetching may only be supported at a device-specific granularity, /// such as at a page boundary. /// In this case, the memory range may be expanded such that the start and /// end of the range satisfy granularity requirements. /// - The application must ensure the memory pointed to by ptr is accessible /// by the device on which the command list was created. /// - The application must ensure the command list was created, and the /// memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clEnqueueSVMMigrateMem /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryPrefetch( ze_command_list_handle_t hCommandList, ///< [in] handle of command list const void* ptr, ///< [in] pointer to start of the memory range to prefetch size_t size ///< [in] size in bytes of the memory range to prefetch ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported memory advice hints typedef enum _ze_memory_advice_t { ZE_MEMORY_ADVICE_SET_READ_MOSTLY = 0, ///< hint that memory will be read from frequently and written to rarely ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY = 1, ///< removes the effect of ::ZE_MEMORY_ADVICE_SET_READ_MOSTLY ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION = 2, ///< hint that the preferred memory location is the specified device ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION = 3, ///< removes the effect of ::ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY = 4, ///< hints that memory will mostly be accessed non-atomically ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY = 5, ///< removes the effect of ::ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY ZE_MEMORY_ADVICE_BIAS_CACHED = 6, ///< hints that memory should be cached ZE_MEMORY_ADVICE_BIAS_UNCACHED = 7, ///< hints that memory should be not be cached ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION = 8, ///< hint that the preferred memory location is host memory ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION = 9, ///< removes the effect of ///< ::ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION ZE_MEMORY_ADVICE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_ADVICE_* ENUMs } ze_memory_advice_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Provides advice about the use of a shared memory range /// /// @details /// - Memory advice is a performance hint only and is not required for /// functional correctness. /// - Memory advice can be used to override driver heuristics to explicitly /// control shared memory behavior. /// - Not all memory advice hints may be supported for all allocation types /// for all devices. /// If a memory advice hint is not supported by the device it will be ignored. /// - Memory advice may only be supported at a device-specific granularity, /// such as at a page boundary. /// In this case, the memory range may be expanded such that the start and /// end of the range satisfy granularity requirements. /// - The application must ensure the memory pointed to by ptr is accessible /// by the device on which the command list was created. /// - The application must ensure the command list was created, and memory /// was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle, and the memory was /// allocated. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION < advice` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemAdvise( ze_command_list_handle_t hCommandList, ///< [in] handle of command list ze_device_handle_t hDevice, ///< [in] device associated with the memory advice const void* ptr, ///< [in] Pointer to the start of the memory range size_t size, ///< [in] Size in bytes of the memory range ze_memory_advice_t advice ///< [in] Memory advice for the memory range ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Event #if !defined(__GNUC__) #pragma region event #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported event pool creation flags typedef uint32_t ze_event_pool_flags_t; typedef enum _ze_event_pool_flag_t { ZE_EVENT_POOL_FLAG_HOST_VISIBLE = ZE_BIT(0), ///< signals and waits are also visible to host ZE_EVENT_POOL_FLAG_IPC = ZE_BIT(1), ///< signals and waits may be shared across processes ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP = ZE_BIT(2), ///< Indicates all events in pool will contain kernel timestamps ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP = ZE_BIT(3), ///< Indicates all events in pool will contain kernel timestamps ///< synchronized to host time domain; cannot be combined with ///< ::ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP ZE_EVENT_POOL_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_POOL_FLAG_* ENUMs } ze_event_pool_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Event pool descriptor typedef struct _ze_event_pool_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_event_pool_flags_t flags; ///< [in] creation flags. ///< must be 0 (default) or a valid combination of ::ze_event_pool_flag_t; ///< default behavior is signals and waits are visible to the entire device ///< and peer devices. uint32_t count; ///< [in] number of events within the pool; must be greater than 0 } ze_event_pool_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a pool of events on the context. /// /// @details /// - The application must only use events within the pool for the /// device(s), or their sub-devices, which were provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phEventPool` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0xf < desc->flags` /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `0 == desc->count` /// + `(nullptr == phDevices) && (0 < numDevices)` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCreate( ze_context_handle_t hContext, ///< [in] handle of the context object const ze_event_pool_desc_t* desc, ///< [in] pointer to event pool descriptor uint32_t numDevices, ///< [in][optional] number of device handles; must be 0 if `nullptr == ///< phDevices` ze_device_handle_t* phDevices, ///< [in][optional][range(0, numDevices)] array of device handles which ///< have visibility to the event pool. ///< if nullptr, then event pool is visible to all devices supported by the ///< driver instance. ze_event_pool_handle_t* phEventPool ///< [out] pointer handle of event pool object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Deletes an event pool object. /// /// @details /// - The application must destroy all event handles created from the pool /// before destroying the pool itself. /// - The application must ensure the device is not currently referencing /// the any event within the pool before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this event pool. /// - The application must **not** call this function from simultaneous /// threads with the same event pool handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEventPool` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolDestroy( ze_event_pool_handle_t hEventPool ///< [in][release] handle of event pool object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported event scope flags typedef uint32_t ze_event_scope_flags_t; typedef enum _ze_event_scope_flag_t { ZE_EVENT_SCOPE_FLAG_SUBDEVICE = ZE_BIT(0), ///< cache hierarchies are flushed or invalidated sufficient for local ///< sub-device access ZE_EVENT_SCOPE_FLAG_DEVICE = ZE_BIT(1), ///< cache hierarchies are flushed or invalidated sufficient for global ///< device access and peer device access ZE_EVENT_SCOPE_FLAG_HOST = ZE_BIT(2), ///< cache hierarchies are flushed or invalidated sufficient for device and ///< host access ZE_EVENT_SCOPE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_SCOPE_FLAG_* ENUMs } ze_event_scope_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Event descriptor typedef struct _ze_event_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t index; ///< [in] index of the event within the pool; must be less than the count ///< specified during pool creation ze_event_scope_flags_t signal; ///< [in] defines the scope of relevant cache hierarchies to flush on a ///< signal action before the event is triggered. ///< must be 0 (default) or a valid combination of ::ze_event_scope_flag_t; ///< default behavior is synchronization within the command list only, no ///< additional cache hierarchies are flushed. ze_event_scope_flags_t wait; ///< [in] defines the scope of relevant cache hierarchies to invalidate on ///< a wait action after the event is complete. ///< must be 0 (default) or a valid combination of ::ze_event_scope_flag_t; ///< default behavior is synchronization within the command list only, no ///< additional cache hierarchies are invalidated. } ze_event_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates an event from the pool. /// /// @details /// - An event is used to communicate fine-grain host-to-device, /// device-to-host or device-to-device dependencies have completed. /// - The application must ensure the location in the pool is not being used /// by another event. /// - The application must **not** call this function from simultaneous /// threads with the same event pool handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clCreateUserEvent** /// - vkCreateEvent /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEventPool` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phEvent` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7 < desc->signal` /// + `0x7 < desc->wait` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventCreate( ze_event_pool_handle_t hEventPool, ///< [in] handle of the event pool const ze_event_desc_t* desc, ///< [in] pointer to event descriptor ze_event_handle_t* phEvent ///< [out] pointer to handle of event object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Deletes an event object. /// /// @details /// - The application must ensure the device is not currently referencing /// the event before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this event. /// - The application must **not** call this function from simultaneous /// threads with the same event handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clReleaseEvent** /// - vkDestroyEvent /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeEventDestroy( ze_event_handle_t hEvent ///< [in][release] handle of event object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets an IPC event pool handle for the specified event handle that can /// be shared with another process. /// /// @details /// - Event pool must have been created with ::ZE_EVENT_POOL_FLAG_IPC. /// - The application may call this function from simultaneous threads. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEventPool` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phIpc` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolGetIpcHandle( ze_event_pool_handle_t hEventPool, ///< [in] handle of event pool object ze_ipc_event_pool_handle_t* phIpc ///< [out] Returned IPC event handle ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns an IPC event pool handle to the driver /// /// @details /// - This call must be used for IPC handles previously obtained with /// ::zeEventPoolGetIpcHandle. /// - Upon call, driver may release any underlying resources associated with /// the IPC handle. /// For instance, it may close the file descriptor contained in the IPC /// handle, if such type of handle is being used by the driver. /// - This call does not destroy the original event pool for which the IPC /// handle was created. /// - This function may **not** be called from simultaneous threads with the /// same IPC handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolPutIpcHandle( ze_context_handle_t hContext, ///< [in] handle of the context object associated with the IPC event pool ///< handle ze_ipc_event_pool_handle_t hIpc ///< [in] IPC event pool handle ); /////////////////////////////////////////////////////////////////////////////// /// @brief Opens an IPC event pool handle to retrieve an event pool handle from /// another process. /// /// @details /// - Multiple calls to this function with the same IPC handle will return /// unique event pool handles. /// - The event handle in this process should not be freed with /// ::zeEventPoolDestroy, but rather with ::zeEventPoolCloseIpcHandle. /// - If the original event pool has been created for a device containing a /// number of sub-devices, then the event pool /// returned by this call may be used on a device containing the same /// number of sub-devices, or on any of /// those sub-devices. /// - However, if the original event pool has been created for a sub-device, /// then the event pool returned by this call /// cannot be used on a device containing any number of sub-devices, and /// must be used only in a sub-device. This ensures /// functional correctness for any implementation or optimizations the /// underlying Level Zero driver may do on /// event pools and events. /// - The application may call this function from simultaneous threads. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phEventPool` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolOpenIpcHandle( ze_context_handle_t hContext, ///< [in] handle of the context object to associate with the IPC event pool ///< handle ze_ipc_event_pool_handle_t hIpc, ///< [in] IPC event pool handle ze_event_pool_handle_t* phEventPool ///< [out] pointer handle of event pool object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Closes an IPC event handle in the current process. /// /// @details /// - Closes an IPC event handle by destroying events that were opened in /// this process using ::zeEventPoolOpenIpcHandle. /// - The application must **not** call this function from simultaneous /// threads with the same event pool handle. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEventPool` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCloseIpcHandle( ze_event_pool_handle_t hEventPool ///< [in][release] handle of event pool object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Appends a signal of the event from the device into a command list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The duration of an event created from an event pool that was created /// using ::ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP or /// ::ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP flags is undefined. /// However, for consistency and orthogonality the event will report /// correctly as signaled when used by other event API functionality. /// - The application must ensure the command list and events were created /// on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clSetUserEventStatus** /// - vkCmdSetEvent /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendSignalEvent( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_event_handle_t hEvent ///< [in] handle of the event ); /////////////////////////////////////////////////////////////////////////////// /// @brief Appends wait on event(s) on the device into a command list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created /// on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phEvents` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWaitOnEvents( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint32_t numEvents, ///< [in] number of events to wait on before continuing ze_event_handle_t* phEvents ///< [in][range(0, numEvents)] handles of the events to wait on before ///< continuing ); /////////////////////////////////////////////////////////////////////////////// /// @brief Signals a event from host. /// /// @details /// - The duration of an event created from an event pool that was created /// using ::ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP or /// ::ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP flags is undefined. /// However, for consistency and orthogonality the event will report /// correctly as signaled when used by other event API functionality. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clSetUserEventStatus /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSignal( ze_event_handle_t hEvent ///< [in] handle of the event ); /////////////////////////////////////////////////////////////////////////////// /// @brief The current host thread waits on an event to be signaled. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clWaitForEvents /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_NOT_READY /// + timeout expired ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSynchronize( ze_event_handle_t hEvent, ///< [in] handle of the event uint64_t timeout ///< [in] if non-zero, then indicates the maximum time (in nanoseconds) to ///< yield before returning ::ZE_RESULT_SUCCESS or ::ZE_RESULT_NOT_READY; ///< if zero, then operates exactly like ::zeEventQueryStatus; ///< if `UINT64_MAX`, then function will not return until complete or ///< device is lost. ///< Due to external dependencies, timeout may be rounded to the closest ///< value allowed by the accuracy of those dependencies. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Queries an event object's status on the host. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **clGetEventInfo** /// - vkGetEventStatus /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_NOT_READY /// + not signaled ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryStatus( ze_event_handle_t hEvent ///< [in] handle of the event ); /////////////////////////////////////////////////////////////////////////////// /// @brief Appends a reset of an event back to not signaled state into a command /// list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created /// on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - vkResetEvent /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendEventReset( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_event_handle_t hEvent ///< [in] handle of the event ); /////////////////////////////////////////////////////////////////////////////// /// @brief The current host thread resets an event back to not signaled state. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - vkResetEvent /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostReset( ze_event_handle_t hEvent ///< [in] handle of the event ); /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel timestamp clock data /// /// @details /// - The timestamp frequency can be queried from the `timerResolution` /// member of ::ze_device_properties_t. /// - The number of valid bits in the timestamp value can be queried from /// the `kernelTimestampValidBits` member of ::ze_device_properties_t. typedef struct _ze_kernel_timestamp_data_t { uint64_t kernelStart; ///< [out] device clock at start of kernel execution uint64_t kernelEnd; ///< [out] device clock at end of kernel execution } ze_kernel_timestamp_data_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel timestamp result typedef struct _ze_kernel_timestamp_result_t { ze_kernel_timestamp_data_t global; ///< [out] wall-clock data ze_kernel_timestamp_data_t context; ///< [out] context-active data; only includes clocks while device context ///< was actively executing. } ze_kernel_timestamp_result_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Queries an event's timestamp value on the host. /// /// @details /// - The application must ensure the event was created from an event pool /// that was created using ::ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP or /// ::ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP flag. /// - The destination memory will be unmodified if the event has not been /// signaled. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_NOT_READY /// + not signaled ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp( ze_event_handle_t hEvent, ///< [in] handle of the event ze_kernel_timestamp_result_t* dstptr ///< [in,out] pointer to memory for where timestamp result will be written. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Appends a query of an events' timestamp value(s) into a command list. /// /// @details /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the events were created from an event pool /// that was created using ::ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP flag. /// - The application must ensure the memory pointed to by both dstptr and /// pOffsets is accessible by the device on which the command list was /// created. /// - The value(s) written to the destination buffer are undefined if any /// timestamp event has not been signaled. /// - If pOffsets is nullptr, then multiple results will be appended /// sequentially into memory in the same order as phEvents. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phEvents` /// + `nullptr == dstptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint32_t numEvents, ///< [in] the number of timestamp events to query ze_event_handle_t* phEvents, ///< [in][range(0, numEvents)] handles of timestamp events to query void* dstptr, ///< [in,out] pointer to memory where ::ze_kernel_timestamp_result_t will ///< be written; must be size-aligned. const size_t* pOffsets, ///< [in][optional][range(0, numEvents)] offset, in bytes, to write ///< results; address must be 4byte-aligned and offsets must be ///< size-aligned. ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing query; ///< must be 0 if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before executing query ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the handle of the event pool for the event. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phEventPool` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventGetEventPool( ze_event_handle_t hEvent, ///< [in] handle of the event ze_event_pool_handle_t* phEventPool ///< [out] handle of the event pool for the event ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the signal event scope. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pSignalScope` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventGetSignalScope( ze_event_handle_t hEvent, ///< [in] handle of the event ze_event_scope_flags_t* pSignalScope ///< [out] signal event scope. This is the scope of relevant cache ///< hierarchies that are flushed on a signal action before the event is ///< triggered. May be 0 or a valid combination of ::ze_event_scope_flag_t. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the wait event scope. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pWaitScope` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventGetWaitScope( ze_event_handle_t hEvent, ///< [in] handle of the event ze_event_scope_flags_t* pWaitScope ///< [out] wait event scope. This is the scope of relevant cache ///< hierarchies invalidated on a wait action after the event is complete. ///< May be 0 or a valid combination of ::ze_event_scope_flag_t. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the handle of the context on which the event pool was created. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEventPool` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phContext` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolGetContextHandle( ze_event_pool_handle_t hEventPool, ///< [in] handle of the event pool ze_context_handle_t* phContext ///< [out] handle of the context on which the event pool was created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the creation flags used to create the event pool. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEventPool` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pFlags` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolGetFlags( ze_event_pool_handle_t hEventPool, ///< [in] handle of the event pool ze_event_pool_flags_t* pFlags ///< [out] creation flags used to create the event pool; may be 0 or a ///< valid combination of ::ze_event_pool_flag_t ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Fence #if !defined(__GNUC__) #pragma region fence #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported fence creation flags typedef uint32_t ze_fence_flags_t; typedef enum _ze_fence_flag_t { ZE_FENCE_FLAG_SIGNALED = ZE_BIT(0), ///< fence is created in the signaled state, otherwise not signaled. ZE_FENCE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_FENCE_FLAG_* ENUMs } ze_fence_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Fence descriptor typedef struct _ze_fence_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_fence_flags_t flags; ///< [in] creation flags. ///< must be 0 (default) or a valid combination of ::ze_fence_flag_t. } ze_fence_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a fence for the command queue. /// /// @details /// - A fence is a heavyweight synchronization primitive used to communicate /// to the host that command list execution has completed. /// - The application must only use the fence for the command queue which /// was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @remarks /// _Analogues_ /// - **vkCreateFence** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandQueue` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phFence` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x1 < desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceCreate( ze_command_queue_handle_t hCommandQueue, ///< [in] handle of command queue const ze_fence_desc_t* desc, ///< [in] pointer to fence descriptor ze_fence_handle_t* phFence ///< [out] pointer to handle of fence object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Deletes a fence object. /// /// @details /// - The application must ensure the device is not currently referencing /// the fence before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this fence. /// - The application must **not** call this function from simultaneous /// threads with the same fence handle. /// - The implementation of this function must be thread-safe. /// /// @remarks /// _Analogues_ /// - **vkDestroyFence** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hFence` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceDestroy( ze_fence_handle_t hFence ///< [in][release] handle of fence object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief The current host thread waits on a fence to be signaled. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **vkWaitForFences** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hFence` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_NOT_READY /// + timeout expired ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceHostSynchronize( ze_fence_handle_t hFence, ///< [in] handle of the fence uint64_t timeout ///< [in] if non-zero, then indicates the maximum time (in nanoseconds) to ///< yield before returning ::ZE_RESULT_SUCCESS or ::ZE_RESULT_NOT_READY; ///< if zero, then operates exactly like ::zeFenceQueryStatus; ///< if `UINT64_MAX`, then function will not return until complete or ///< device is lost. ///< Due to external dependencies, timeout may be rounded to the closest ///< value allowed by the accuracy of those dependencies. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Queries a fence object's status. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **vkGetFenceStatus** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hFence` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_NOT_READY /// + not signaled ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceQueryStatus( ze_fence_handle_t hFence ///< [in] handle of the fence ); /////////////////////////////////////////////////////////////////////////////// /// @brief Reset a fence back to the not signaled state. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - **vkResetFences** /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hFence` ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceReset( ze_fence_handle_t hFence ///< [in] handle of the fence ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Images #if !defined(__GNUC__) #pragma region image #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported image creation flags typedef uint32_t ze_image_flags_t; typedef enum _ze_image_flag_t { ZE_IMAGE_FLAG_KERNEL_WRITE = ZE_BIT(0), ///< kernels will write contents ZE_IMAGE_FLAG_BIAS_UNCACHED = ZE_BIT(1), ///< device should not cache contents ZE_IMAGE_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_FLAG_* ENUMs } ze_image_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported image types typedef enum _ze_image_type_t { ZE_IMAGE_TYPE_1D = 0, ///< 1D ZE_IMAGE_TYPE_1DARRAY = 1, ///< 1D array ZE_IMAGE_TYPE_2D = 2, ///< 2D ZE_IMAGE_TYPE_2DARRAY = 3, ///< 2D array ZE_IMAGE_TYPE_3D = 4, ///< 3D ZE_IMAGE_TYPE_BUFFER = 5, ///< Buffer ZE_IMAGE_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_TYPE_* ENUMs } ze_image_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported image format layouts typedef enum _ze_image_format_layout_t { ZE_IMAGE_FORMAT_LAYOUT_8 = 0, ///< 8-bit single component layout ZE_IMAGE_FORMAT_LAYOUT_16 = 1, ///< 16-bit single component layout ZE_IMAGE_FORMAT_LAYOUT_32 = 2, ///< 32-bit single component layout ZE_IMAGE_FORMAT_LAYOUT_8_8 = 3, ///< 2-component 8-bit layout ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 = 4, ///< 4-component 8-bit layout ZE_IMAGE_FORMAT_LAYOUT_16_16 = 5, ///< 2-component 16-bit layout ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 = 6, ///< 4-component 16-bit layout ZE_IMAGE_FORMAT_LAYOUT_32_32 = 7, ///< 2-component 32-bit layout ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 = 8, ///< 4-component 32-bit layout ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2 = 9, ///< 4-component 10_10_10_2 layout ZE_IMAGE_FORMAT_LAYOUT_11_11_10 = 10, ///< 3-component 11_11_10 layout ZE_IMAGE_FORMAT_LAYOUT_5_6_5 = 11, ///< 3-component 5_6_5 layout ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1 = 12, ///< 4-component 5_5_5_1 layout ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4 = 13, ///< 4-component 4_4_4_4 layout ZE_IMAGE_FORMAT_LAYOUT_Y8 = 14, ///< Media Format: Y8. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_NV12 = 15, ///< Media Format: NV12. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_YUYV = 16, ///< Media Format: YUYV. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_VYUY = 17, ///< Media Format: VYUY. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_YVYU = 18, ///< Media Format: YVYU. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_UYVY = 19, ///< Media Format: UYVY. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_AYUV = 20, ///< Media Format: AYUV. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_P010 = 21, ///< Media Format: P010. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_Y410 = 22, ///< Media Format: Y410. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_P012 = 23, ///< Media Format: P012. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_Y16 = 24, ///< Media Format: Y16. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_P016 = 25, ///< Media Format: P016. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_Y216 = 26, ///< Media Format: Y216. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_P216 = 27, ///< Media Format: P216. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_P8 = 28, ///< Media Format: P8. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_YUY2 = 29, ///< Media Format: YUY2. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_A8P8 = 30, ///< Media Format: A8P8. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_IA44 = 31, ///< Media Format: IA44. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_AI44 = 32, ///< Media Format: AI44. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_Y416 = 33, ///< Media Format: Y416. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_Y210 = 34, ///< Media Format: Y210. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_I420 = 35, ///< Media Format: I420. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_YV12 = 36, ///< Media Format: YV12. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_400P = 37, ///< Media Format: 400P. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_422H = 38, ///< Media Format: 422H. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_422V = 39, ///< Media Format: 422V. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_444P = 40, ///< Media Format: 444P. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_RGBP = 41, ///< Media Format: RGBP. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_BRGP = 42, ///< Media Format: BRGP. Format type and swizzle is ignored for this. ZE_IMAGE_FORMAT_LAYOUT_8_8_8 = 43, ///< 3-component 8-bit layout ZE_IMAGE_FORMAT_LAYOUT_16_16_16 = 44, ///< 3-component 16-bit layout ZE_IMAGE_FORMAT_LAYOUT_32_32_32 = 45, ///< 3-component 32-bit layout ZE_IMAGE_FORMAT_LAYOUT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_FORMAT_LAYOUT_* ENUMs } ze_image_format_layout_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported image format types typedef enum _ze_image_format_type_t { ZE_IMAGE_FORMAT_TYPE_UINT = 0, ///< Unsigned integer ZE_IMAGE_FORMAT_TYPE_SINT = 1, ///< Signed integer ZE_IMAGE_FORMAT_TYPE_UNORM = 2, ///< Unsigned normalized integer ZE_IMAGE_FORMAT_TYPE_SNORM = 3, ///< Signed normalized integer ZE_IMAGE_FORMAT_TYPE_FLOAT = 4, ///< Float ZE_IMAGE_FORMAT_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_FORMAT_TYPE_* ENUMs } ze_image_format_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported image format component swizzle into channel typedef enum _ze_image_format_swizzle_t { ZE_IMAGE_FORMAT_SWIZZLE_R = 0, ///< Red component ZE_IMAGE_FORMAT_SWIZZLE_G = 1, ///< Green component ZE_IMAGE_FORMAT_SWIZZLE_B = 2, ///< Blue component ZE_IMAGE_FORMAT_SWIZZLE_A = 3, ///< Alpha component ZE_IMAGE_FORMAT_SWIZZLE_0 = 4, ///< Zero ZE_IMAGE_FORMAT_SWIZZLE_1 = 5, ///< One ZE_IMAGE_FORMAT_SWIZZLE_X = 6, ///< Don't care ZE_IMAGE_FORMAT_SWIZZLE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_FORMAT_SWIZZLE_* ENUMs } ze_image_format_swizzle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image format typedef struct _ze_image_format_t { ze_image_format_layout_t layout; ///< [in] image format component layout (e.g. N-component layouts and media ///< formats) ze_image_format_type_t type; ///< [in] image format type ze_image_format_swizzle_t x; ///< [in] image component swizzle into channel x ze_image_format_swizzle_t y; ///< [in] image component swizzle into channel y ze_image_format_swizzle_t z; ///< [in] image component swizzle into channel z ze_image_format_swizzle_t w; ///< [in] image component swizzle into channel w } ze_image_format_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image descriptor typedef struct _ze_image_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_image_flags_t flags; ///< [in] creation flags. ///< must be 0 (default) or a valid combination of ::ze_image_flag_t; ///< default is read-only, cached access. ze_image_type_t type; ///< [in] image type. Media format layouts are unsupported for ///< ::ZE_IMAGE_TYPE_BUFFER ze_image_format_t format; ///< [in] image format uint64_t width; ///< [in] width dimension. ///< ::ZE_IMAGE_TYPE_BUFFER: size in bytes; see the `maxImageBufferSize` ///< member of ::ze_device_image_properties_t for limits. ///< ::ZE_IMAGE_TYPE_1D, ::ZE_IMAGE_TYPE_1DARRAY: width in pixels; see the ///< `maxImageDims1D` member of ::ze_device_image_properties_t for limits. ///< ::ZE_IMAGE_TYPE_2D, ::ZE_IMAGE_TYPE_2DARRAY: width in pixels; see the ///< `maxImageDims2D` member of ::ze_device_image_properties_t for limits. ///< ::ZE_IMAGE_TYPE_3D: width in pixels; see the `maxImageDims3D` member ///< of ::ze_device_image_properties_t for limits. uint32_t height; ///< [in] height dimension. ///< ::ZE_IMAGE_TYPE_2D, ::ZE_IMAGE_TYPE_2DARRAY: height in pixels; see the ///< `maxImageDims2D` member of ::ze_device_image_properties_t for limits. ///< ::ZE_IMAGE_TYPE_3D: height in pixels; see the `maxImageDims3D` member ///< of ::ze_device_image_properties_t for limits. ///< other: ignored. uint32_t depth; ///< [in] depth dimension. ///< ::ZE_IMAGE_TYPE_3D: depth in pixels; see the `maxImageDims3D` member ///< of ::ze_device_image_properties_t for limits. ///< other: ignored. uint32_t arraylevels; ///< [in] array levels. ///< ::ZE_IMAGE_TYPE_1DARRAY, ::ZE_IMAGE_TYPE_2DARRAY: see the ///< `maxImageArraySlices` member of ::ze_device_image_properties_t for limits. ///< other: ignored. uint32_t miplevels; ///< [in] mipmap levels (must be 0) } ze_image_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported sampler filtering flags typedef uint32_t ze_image_sampler_filter_flags_t; typedef enum _ze_image_sampler_filter_flag_t { ZE_IMAGE_SAMPLER_FILTER_FLAG_POINT = ZE_BIT(0), ///< device supports point filtering ZE_IMAGE_SAMPLER_FILTER_FLAG_LINEAR = ZE_BIT(1), ///< device supports linear filtering ZE_IMAGE_SAMPLER_FILTER_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_SAMPLER_FILTER_FLAG_* ENUMs } ze_image_sampler_filter_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image properties typedef struct _ze_image_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_image_sampler_filter_flags_t samplerFilterFlags; ///< [out] supported sampler filtering. ///< returns 0 (unsupported) or a combination of ::ze_image_sampler_filter_flag_t. } ze_image_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves supported properties of an image. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == pImageProperties` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// + `::ZE_IMAGE_TYPE_BUFFER < desc->type` ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetProperties( ze_device_handle_t hDevice, ///< [in] handle of the device const ze_image_desc_t* desc, ///< [in] pointer to image descriptor ze_image_properties_t* pImageProperties ///< [out] pointer to image properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates an image on the context. /// /// @details /// - The application must only use the image for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @remarks /// _Analogues_ /// - clCreateImage /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phImage` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// + `::ZE_IMAGE_TYPE_BUFFER < desc->type` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT ZE_APIEXPORT ze_result_t ZE_APICALL zeImageCreate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device const ze_image_desc_t* desc, ///< [in] pointer to image descriptor ze_image_handle_t* phImage ///< [out] pointer to handle of image object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Deletes an image object. /// /// @details /// - The application must ensure the device is not currently referencing /// the image before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this image. /// - The application must **not** call this function from simultaneous /// threads with the same image handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hImage` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeImageDestroy( ze_image_handle_t hImage ///< [in][release] handle of image object to destroy ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Memory #if !defined(__GNUC__) #pragma region memory #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported memory allocation flags typedef uint32_t ze_device_mem_alloc_flags_t; typedef enum _ze_device_mem_alloc_flag_t { ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED = ZE_BIT(0), ///< device should cache allocation ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED = ZE_BIT(1), ///< device should not cache allocation (UC) ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT = ZE_BIT(2), ///< optimize shared allocation for first access on the device ZE_DEVICE_MEM_ALLOC_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_MEM_ALLOC_FLAG_* ENUMs } ze_device_mem_alloc_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device memory allocation descriptor typedef struct _ze_device_mem_alloc_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_mem_alloc_flags_t flags; ///< [in] flags specifying additional allocation controls. ///< must be 0 (default) or a valid combination of ::ze_device_mem_alloc_flag_t; ///< default behavior may use implicit driver-based heuristics. uint32_t ordinal; ///< [in] ordinal of the device's local memory to allocate from. ///< must be less than the count returned from ::zeDeviceGetMemoryProperties. } ze_device_mem_alloc_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported host memory allocation flags typedef uint32_t ze_host_mem_alloc_flags_t; typedef enum _ze_host_mem_alloc_flag_t { ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED = ZE_BIT(0), ///< host should cache allocation ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED = ZE_BIT(1), ///< host should not cache allocation (UC) ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED = ZE_BIT(2), ///< host memory should be allocated write-combined (WC) ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT = ZE_BIT(3), ///< optimize shared allocation for first access on the host ZE_HOST_MEM_ALLOC_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_HOST_MEM_ALLOC_FLAG_* ENUMs } ze_host_mem_alloc_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Host memory allocation descriptor typedef struct _ze_host_mem_alloc_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_host_mem_alloc_flags_t flags; ///< [in] flags specifying additional allocation controls. ///< must be 0 (default) or a valid combination of ::ze_host_mem_alloc_flag_t; ///< default behavior may use implicit driver-based heuristics. } ze_host_mem_alloc_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Allocates shared memory on the context. /// /// @details /// - Shared allocations share ownership between the host and one or more /// devices. /// - Shared allocations may optionally be associated with a device by /// passing a handle to the device. /// - Devices supporting only single-device shared access capabilities may /// access shared memory associated with the device. /// For these devices, ownership of the allocation is shared between the /// host and the associated device only. /// - Passing nullptr as the device handle does not associate the shared /// allocation with any device. /// For allocations with no associated device, ownership of the allocation /// is shared between the host and all devices supporting cross-device /// shared access capabilities. /// - The application must only use the memory allocation for the context /// and device, or its sub-devices, which was provided during allocation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == device_desc` /// + `nullptr == host_desc` /// + `nullptr == pptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7 < device_desc->flags` /// + `0xf < host_desc->flags` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT /// + Must be zero or a power-of-two /// + `0 != (alignment & (alignment - 1))` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocShared( ze_context_handle_t hContext, ///< [in] handle of the context object const ze_device_mem_alloc_desc_t* device_desc, ///< [in] pointer to device memory allocation descriptor const ze_host_mem_alloc_desc_t* host_desc, ///< [in] pointer to host memory allocation descriptor size_t size, ///< [in] size in bytes to allocate; must be less than or equal to the ///< `maxMemAllocSize` member of ::ze_device_properties_t size_t alignment, ///< [in] minimum alignment in bytes for the allocation; must be a power of ///< two ze_device_handle_t hDevice, ///< [in][optional] device handle to associate with void** pptr ///< [out] pointer to shared allocation ); /////////////////////////////////////////////////////////////////////////////// /// @brief Allocates device memory on the context. /// /// @details /// - Device allocations are owned by a specific device. /// - In general, a device allocation may only be accessed by the device /// that owns it. /// - The application must only use the memory allocation for the context /// and device, or its sub-devices, which was provided during allocation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == device_desc` /// + `nullptr == pptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7 < device_desc->flags` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT /// + Must be zero or a power-of-two /// + `0 != (alignment & (alignment - 1))` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocDevice( ze_context_handle_t hContext, ///< [in] handle of the context object const ze_device_mem_alloc_desc_t* device_desc, ///< [in] pointer to device memory allocation descriptor size_t size, ///< [in] size in bytes to allocate; must be less than or equal to the ///< `maxMemAllocSize` member of ::ze_device_properties_t size_t alignment, ///< [in] minimum alignment in bytes for the allocation; must be a power of ///< two ze_device_handle_t hDevice, ///< [in] handle of the device void** pptr ///< [out] pointer to device allocation ); /////////////////////////////////////////////////////////////////////////////// /// @brief Allocates host memory on the context. /// /// @details /// - Host allocations are owned by the host process. /// - Host allocations are accessible by the host and all devices within the /// driver's context. /// - Host allocations are frequently used as staging areas to transfer data /// to or from devices. /// - The application must only use the memory allocation for the context /// which was provided during allocation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == host_desc` /// + `nullptr == pptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0xf < host_desc->flags` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT /// + Must be zero or a power-of-two /// + `0 != (alignment & (alignment - 1))` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocHost( ze_context_handle_t hContext, ///< [in] handle of the context object const ze_host_mem_alloc_desc_t* host_desc, ///< [in] pointer to host memory allocation descriptor size_t size, ///< [in] size in bytes to allocate; must be less than or equal to the ///< `maxMemAllocSize` member of ::ze_device_properties_t size_t alignment, ///< [in] minimum alignment in bytes for the allocation; must be a power of ///< two void** pptr ///< [out] pointer to host allocation ); /////////////////////////////////////////////////////////////////////////////// /// @brief Frees allocated host memory, device memory, or shared memory on the /// context. /// /// @details /// - The application must ensure the device is not currently referencing /// the memory before it is freed /// - The implementation will use the default and immediate policy to /// schedule all Host and Device allocations associated with this memory /// to be freed, without any safety checking. Actual freeing of memory is /// specific to user mode driver and kernel mode driver implementation and /// may be done asynchronously. /// - The application must **not** call this function from simultaneous /// threads with the same pointer. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemFree( ze_context_handle_t hContext, ///< [in] handle of the context object void* ptr ///< [in][release] pointer to memory to free ); /////////////////////////////////////////////////////////////////////////////// /// @brief Memory allocation type typedef enum _ze_memory_type_t { ZE_MEMORY_TYPE_UNKNOWN = 0, ///< the memory pointed to is of unknown type ZE_MEMORY_TYPE_HOST = 1, ///< the memory pointed to is a host allocation ZE_MEMORY_TYPE_DEVICE = 2, ///< the memory pointed to is a device allocation ZE_MEMORY_TYPE_SHARED = 3, ///< the memory pointed to is a shared ownership allocation ZE_MEMORY_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_TYPE_* ENUMs } ze_memory_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Memory allocation properties queried using ::zeMemGetAllocProperties typedef struct _ze_memory_allocation_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_memory_type_t type; ///< [out] type of allocated memory uint64_t id; ///< [out] identifier for this allocation uint64_t pageSize; ///< [out] page size used for allocation } ze_memory_allocation_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves attributes of a memory allocation /// /// @details /// - The application may call this function from simultaneous threads. /// - The application may query attributes of a memory allocation unrelated /// to the context. /// When this occurs, the returned allocation type will be /// ::ZE_MEMORY_TYPE_UNKNOWN, and the returned identifier and associated /// device is unspecified. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// + `nullptr == pMemAllocProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAllocProperties( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] memory pointer to query ze_memory_allocation_properties_t* pMemAllocProperties, ///< [in,out] query result for memory allocation properties ze_device_handle_t* phDevice ///< [out][optional] device associated with this allocation ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves the base address and/or size of an allocation /// /// @details /// - The application may call this function from simultaneous threads. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAddressRange( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] memory pointer to query void** pBase, ///< [in,out][optional] base address of the allocation size_t* pSize ///< [in,out][optional] size of the allocation ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates an IPC memory handle for the specified allocation /// /// @details /// - Takes a pointer to a device memory allocation and creates an IPC /// memory handle for exporting it for use in another process. /// - The pointer must be base pointer of a device or host memory /// allocation; i.e. the value returned from ::zeMemAllocDevice or from /// ::zeMemAllocHost, respectively. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// + `nullptr == pIpcHandle` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetIpcHandle( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] pointer to the device memory allocation ze_ipc_mem_handle_t* pIpcHandle ///< [out] Returned IPC memory handle ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates an IPC memory handle out of a file descriptor /// /// @details /// - Handle passed must be a valid file descriptor obtained with /// ::ze_external_memory_export_fd_t via ::zeMemGetAllocProperties. /// - Returned IPC handle may contain metadata in addition to the file /// descriptor. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pIpcHandle` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetIpcHandleFromFileDescriptorExp( ze_context_handle_t hContext, ///< [in] handle of the context object uint64_t handle, ///< [in] file descriptor ze_ipc_mem_handle_t* pIpcHandle ///< [out] Returned IPC memory handle ); /////////////////////////////////////////////////////////////////////////////// /// @brief Gets the file descriptor contained in an IPC memory handle /// /// @details /// - IPC memory handle must be a valid handle obtained with /// ::zeMemGetIpcHandle. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pHandle` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetFileDescriptorFromIpcHandleExp( ze_context_handle_t hContext, ///< [in] handle of the context object ze_ipc_mem_handle_t ipcHandle, ///< [in] IPC memory handle uint64_t* pHandle ///< [out] Returned file descriptor ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns an IPC memory handle to the driver /// /// @details /// - This call may be used for IPC handles previously obtained with either /// ::zeMemGetIpcHandle or with ::ze_external_memory_export_fd_t via ::zeMemGetAllocProperties. /// - Upon call, driver may release any underlying resources associated with /// the IPC handle. /// For instance, it may close the file descriptor contained in the IPC /// handle, if such type of handle is being used by the driver. /// - This call does not free the original allocation for which the IPC /// handle was created. /// - This function may **not** be called from simultaneous threads with the /// same IPC handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemPutIpcHandle( ze_context_handle_t hContext, ///< [in] handle of the context object ze_ipc_mem_handle_t handle ///< [in] IPC memory handle ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported IPC memory flags typedef uint32_t ze_ipc_memory_flags_t; typedef enum _ze_ipc_memory_flag_t { ZE_IPC_MEMORY_FLAG_BIAS_CACHED = ZE_BIT(0), ///< device should cache allocation ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED = ZE_BIT(1), ///< device should not cache allocation (UC) ZE_IPC_MEMORY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IPC_MEMORY_FLAG_* ENUMs } ze_ipc_memory_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Opens an IPC memory handle to retrieve a device pointer on the /// context. /// /// @details /// - Takes an IPC memory handle from a remote process and associates it /// with a device pointer usable in this process. /// - The device pointer in this process should not be freed with /// ::zeMemFree, but rather with ::zeMemCloseIpcHandle. /// - Multiple calls to this function with the same IPC handle will return /// unique pointers. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < flags` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pptr` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemOpenIpcHandle( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device to associate with the IPC memory handle ze_ipc_mem_handle_t handle, ///< [in] IPC memory handle ze_ipc_memory_flags_t flags, ///< [in] flags controlling the operation. ///< must be 0 (default) or a valid combination of ::ze_ipc_memory_flag_t. void** pptr ///< [out] pointer to device allocation in this process ); /////////////////////////////////////////////////////////////////////////////// /// @brief Closes an IPC memory handle /// /// @details /// - Closes an IPC memory handle by unmapping memory that was opened in /// this process using ::zeMemOpenIpcHandle. /// - The application must **not** call this function from simultaneous /// threads with the same pointer. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemCloseIpcHandle( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr ///< [in][release] pointer to device allocation in this process ); /////////////////////////////////////////////////////////////////////////////// /// @brief Additional allocation descriptor for exporting external memory /// /// @details /// - This structure may be passed to ::zeMemAllocDevice and /// ::zeMemAllocHost, via the `pNext` member of /// ::ze_device_mem_alloc_desc_t or ::ze_host_mem_alloc_desc_t, /// respectively, to indicate an exportable memory allocation. /// - This structure may be passed to ::zeImageCreate, via the `pNext` /// member of ::ze_image_desc_t, to indicate an exportable image. typedef struct _ze_external_memory_export_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_memory_type_flags_t flags; ///< [in] flags specifying memory export types for this allocation. ///< must be 0 (default) or a valid combination of ::ze_external_memory_type_flags_t } ze_external_memory_export_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Additional allocation descriptor for importing external memory as a /// file descriptor /// /// @details /// - This structure may be passed to ::zeMemAllocDevice or /// ::zeMemAllocHost, via the `pNext` member of /// ::ze_device_mem_alloc_desc_t or of ::ze_host_mem_alloc_desc_t, /// respectively, to import memory from a file descriptor. /// - This structure may be passed to ::zeImageCreate, via the `pNext` /// member of ::ze_image_desc_t, to import memory from a file descriptor. typedef struct _ze_external_memory_import_fd_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_memory_type_flags_t flags; ///< [in] flags specifying the memory import type for the file descriptor. ///< must be 0 (default) or a valid combination of ::ze_external_memory_type_flags_t int fd; ///< [in] the file descriptor handle to import } ze_external_memory_import_fd_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Exports an allocation as a file descriptor /// /// @details /// - This structure may be passed to ::zeMemGetAllocProperties, via the /// `pNext` member of ::ze_memory_allocation_properties_t, to export a /// memory allocation as a file descriptor. /// - This structure may be passed to ::zeImageGetAllocPropertiesExt, via /// the `pNext` member of ::ze_image_allocation_ext_properties_t, to /// export an image as a file descriptor. /// - The requested memory export type must have been specified when the /// allocation was made. typedef struct _ze_external_memory_export_fd_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_memory_type_flags_t flags; ///< [in] flags specifying the memory export type for the file descriptor. ///< must be 0 (default) or a valid combination of ::ze_external_memory_type_flags_t int fd; ///< [out] the exported file descriptor handle representing the allocation. } ze_external_memory_export_fd_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Additional allocation descriptor for importing external memory as a /// Win32 handle /// /// @details /// - When `handle` is `nullptr`, `name` must not be `nullptr`. /// - When `name` is `nullptr`, `handle` must not be `nullptr`. /// - When `flags` is ::ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32_KMT, /// `name` must be `nullptr`. /// - This structure may be passed to ::zeMemAllocDevice or /// ::zeMemAllocHost, via the `pNext` member of /// ::ze_device_mem_alloc_desc_t or of ::ze_host_mem_alloc_desc_t, /// respectively, to import memory from a Win32 handle. /// - This structure may be passed to ::zeImageCreate, via the `pNext` /// member of ::ze_image_desc_t, to import memory from a Win32 handle. typedef struct _ze_external_memory_import_win32_handle_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_memory_type_flags_t flags; ///< [in] flags specifying the memory import type for the Win32 handle. ///< must be 0 (default) or a valid combination of ::ze_external_memory_type_flags_t void* handle; ///< [in][optional] the Win32 handle to import const void* name; ///< [in][optional] name of a memory object to import } ze_external_memory_import_win32_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Exports an allocation as a Win32 handle /// /// @details /// - This structure may be passed to ::zeMemGetAllocProperties, via the /// `pNext` member of ::ze_memory_allocation_properties_t, to export a /// memory allocation as a Win32 handle. /// - This structure may be passed to ::zeImageGetAllocPropertiesExt, via /// the `pNext` member of ::ze_image_allocation_ext_properties_t, to /// export an image as a Win32 handle. /// - The requested memory export type must have been specified when the /// allocation was made. typedef struct _ze_external_memory_export_win32_handle_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_memory_type_flags_t flags; ///< [in] flags specifying the memory export type for the Win32 handle. ///< must be 0 (default) or a valid combination of ::ze_external_memory_type_flags_t void* handle; ///< [out] the exported Win32 handle representing the allocation. } ze_external_memory_export_win32_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief atomic access attribute flags typedef uint32_t ze_memory_atomic_attr_exp_flags_t; typedef enum _ze_memory_atomic_attr_exp_flag_t { ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_ATOMICS = ZE_BIT(0), ///< Atomics on the pointer are not allowed ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_HOST_ATOMICS = ZE_BIT(1), ///< Host atomics on the pointer are not allowed ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_HOST_ATOMICS = ZE_BIT(2), ///< Host atomics on the pointer are allowed. Requires ///< ::ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC returned by ///< ::zeDeviceGetMemoryAccessProperties. ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_DEVICE_ATOMICS = ZE_BIT(3), ///< Device atomics on the pointer are not allowed ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_DEVICE_ATOMICS = ZE_BIT(4), ///< Device atomics on the pointer are allowed. Requires ///< ::ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC returned by ///< ::zeDeviceGetMemoryAccessProperties. ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_SYSTEM_ATOMICS = ZE_BIT(5), ///< Concurrent atomics on the pointer from both host and device are not ///< allowed ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_SYSTEM_ATOMICS = ZE_BIT(6), ///< Concurrent atomics on the pointer from both host and device are ///< allowed. Requires ::ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC ///< returned by ::zeDeviceGetMemoryAccessProperties. ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_* ENUMs } ze_memory_atomic_attr_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Sets atomic access attributes for a shared allocation /// /// @details /// - If the shared-allocation is owned by multiple devices (i.e. nullptr /// was passed to ::zeMemAllocShared when creating it), then hDevice may be /// passed to set the attributes in that specific device. If nullptr is /// passed in hDevice, then the atomic attributes are set in all devices /// associated with the allocation. /// - If the atomic access attribute select is not supported by the driver, /// ::ZE_RESULT_ERROR_INVALID_ARGUMENT is returned. /// - The atomic access attribute may be only supported at a device-specific /// granularity, such as at a page boundary. In this case, the memory range /// may be expanded such that the start and end of the range satisfy granularity /// requirements. /// - When calling this function multiple times with different flags, only the /// attributes from last call are honored. /// - The application must not call this function for shared-allocations currently /// being used by the device. /// - The application must **not** call this function from simultaneous threads /// with the same pointer. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7f < attr` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemSetAtomicAccessAttributeExp( ze_context_handle_t hContext, ///< [in] handle of context ze_device_handle_t hDevice, ///< [in] device associated with the memory advice const void* ptr, ///< [in] Pointer to the start of the memory range size_t size, ///< [in] Size in bytes of the memory range ze_memory_atomic_attr_exp_flags_t attr ///< [in] Atomic access attributes to set for the specified range. ///< Must be 0 (default) or a valid combination of ::ze_memory_atomic_attr_exp_flag_t. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves the atomic access attributes previously set for a shared /// allocation /// /// @details /// - The application may call this function from simultaneous threads /// with the same pointer. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// + `nullptr == pAttr` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAtomicAccessAttributeExp( ze_context_handle_t hContext, ///< [in] handle of context ze_device_handle_t hDevice, ///< [in] device associated with the memory advice const void* ptr, ///< [in] Pointer to the start of the memory range size_t size, ///< [in] Size in bytes of the memory range ze_memory_atomic_attr_exp_flags_t* pAttr ///< [out] Atomic access attributes for the specified range ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Module #if !defined(__GNUC__) #pragma region module #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported module creation input formats typedef enum _ze_module_format_t { ZE_MODULE_FORMAT_IL_SPIRV = 0, ///< Format is SPIRV IL format ZE_MODULE_FORMAT_NATIVE = 1, ///< Format is device native format ZE_MODULE_FORMAT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MODULE_FORMAT_* ENUMs } ze_module_format_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Specialization constants - User defined constants typedef struct _ze_module_constants_t { uint32_t numConstants; ///< [in] Number of specialization constants. const uint32_t* pConstantIds; ///< [in][range(0, numConstants)] Array of IDs that is sized to ///< numConstants. const void** pConstantValues; ///< [in][range(0, numConstants)] Array of pointers to values that is sized ///< to numConstants. } ze_module_constants_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Module descriptor typedef struct _ze_module_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_module_format_t format; ///< [in] Module format passed in with pInputModule size_t inputSize; ///< [in] size of input IL or ISA from pInputModule. const uint8_t* pInputModule; ///< [in] pointer to IL or ISA const char* pBuildFlags; ///< [in][optional] string containing one or more (comma-separated) ///< compiler flags. If unsupported, flag is ignored with a warning. ///< - "-ze-opt-disable" ///< - Disable optimizations ///< - "-ze-opt-level" ///< - Specifies optimization level for compiler. Levels are ///< implementation specific. ///< - 0 is no optimizations (equivalent to -ze-opt-disable) ///< - 1 is optimize minimally (may be the same as 2) ///< - 2 is optimize more (default) ///< - "-ze-opt-greater-than-4GB-buffer-required" ///< - Use 64-bit offset calculations for buffers. ///< - "-ze-opt-large-register-file" ///< - Increase number of registers available to threads. ///< - "-ze-opt-has-buffer-offset-arg" ///< - Extend stateless to stateful optimization to more ///< cases with the use of additional offset (e.g. 64-bit ///< pointer to binding table with 32-bit offset). ///< - "-g" ///< - Include debugging information. const ze_module_constants_t* pConstants; ///< [in][optional] pointer to specialization constants. Valid only for ///< SPIR-V input. This must be set to nullptr if no specialization ///< constants are provided. } ze_module_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a module on the context. /// /// @details /// - Compiles the module for execution on the device. /// - The application must only use the module for the device, or its /// sub-devices, which was provided during creation. /// - The module can be copied to other devices and contexts within the same /// driver instance by using ::zeModuleGetNativeBinary. /// - A build log can optionally be returned to the caller. The caller is /// responsible for destroying build log using ::zeModuleBuildLogDestroy. /// - The module descriptor constants are only supported for SPIR-V /// specialization constants. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == desc->pInputModule` /// + `nullptr == phModule` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_MODULE_FORMAT_NATIVE < desc->format` /// - ::ZE_RESULT_ERROR_INVALID_NATIVE_BINARY /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `0 == desc->inputSize` /// - ::ZE_RESULT_ERROR_MODULE_BUILD_FAILURE ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleCreate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device const ze_module_desc_t* desc, ///< [in] pointer to module descriptor ze_module_handle_t* phModule, ///< [out] pointer to handle of module object created ze_module_build_log_handle_t* phBuildLog ///< [out][optional] pointer to handle of module's build log. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys module /// /// @details /// - The application must destroy all kernel handles created from the /// module before destroying the module itself. /// - The application must ensure the device is not currently referencing /// the module before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this module. /// - The application must **not** call this function from simultaneous /// threads with the same module handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDestroy( ze_module_handle_t hModule ///< [in][release] handle of the module ); /////////////////////////////////////////////////////////////////////////////// /// @brief Dynamically link modules together that share import/export linkage /// dependencies. /// /// @details /// - Modules support SPIR-V import and export linkage types for functions /// and global variables. See the SPIR-V specification for linkage /// details. /// - Modules can have both import and export linkage. /// - Modules that do not have any imports or exports do not need to be /// linked. /// - All module import requirements must be satisfied via linking before /// kernel objects can be created from them. /// - Modules cannot be partially linked. Unsatisfiable import dependencies /// in the set of modules passed to ::zeModuleDynamicLink will result in /// ::ZE_RESULT_ERROR_MODULE_LINK_FAILURE being returned. /// - Modules will only be linked once. A module can be used in multiple /// link calls if it has exports but its imports will not be re-linked. /// - Ambiguous dependencies, where multiple modules satisfy the same import /// dependencies for a module, are not allowed. /// - The application must ensure the modules being linked were created on /// the same context. /// - The application may call this function from simultaneous threads as /// long as the import modules being linked are not the same. /// - ModuleGetNativeBinary can be called on any module regardless of /// whether it is linked or not. /// - A link log can optionally be returned to the caller. The caller is /// responsible for destroying the link log using /// ::zeModuleBuildLogDestroy. /// - The link log may contain a list of the unresolved import dependencies /// if present. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phModules` /// - ::ZE_RESULT_ERROR_MODULE_LINK_FAILURE ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDynamicLink( uint32_t numModules, ///< [in] number of modules to be linked pointed to by phModules. ze_module_handle_t* phModules, ///< [in][range(0, numModules)] pointer to an array of modules to ///< dynamically link together. ze_module_build_log_handle_t* phLinkLog ///< [out][optional] pointer to handle of dynamic link log. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys module build log object /// /// @details /// - The implementation of this function may immediately free all Host /// allocations associated with this object. /// - The application must **not** call this function from simultaneous /// threads with the same build log handle. /// - The implementation of this function should be lock-free. /// - This function can be called before or after ::zeModuleDestroy for the /// associated module. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModuleBuildLog` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogDestroy( ze_module_build_log_handle_t hModuleBuildLog ///< [in][release] handle of the module build log object. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves text string for build log. /// /// @details /// - The caller can pass nullptr for pBuildLog when querying only for size. /// - The caller must provide memory for build log. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModuleBuildLog` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pSize` ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogGetString( ze_module_build_log_handle_t hModuleBuildLog, ///< [in] handle of the module build log object. size_t* pSize, ///< [in,out] size of build log string. char* pBuildLog ///< [in,out][optional] pointer to null-terminated string of the log. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve native binary from Module. /// /// @details /// - The native binary output can be cached to disk and new modules can be /// later constructed from the cached copy. /// - The native binary will retain debugging information that is associated /// with a module. /// - The caller can pass nullptr for pModuleNativeBinary when querying only /// for size. /// - The implementation will copy the native binary into a buffer supplied /// by the caller. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pSize` ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetNativeBinary( ze_module_handle_t hModule, ///< [in] handle of the module size_t* pSize, ///< [in,out] size of native binary in bytes. uint8_t* pModuleNativeBinary ///< [in,out][optional] byte pointer to native binary ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve global variable pointer from Module. /// /// @details /// - The application may query global pointer from any module that either /// exports or imports it. /// - The application must dynamically link a module that imports a global /// before the global pointer can be queried from it. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pGlobalName` /// - ::ZE_RESULT_ERROR_INVALID_GLOBAL_NAME ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetGlobalPointer( ze_module_handle_t hModule, ///< [in] handle of the module const char* pGlobalName, ///< [in] name of global variable in module size_t* pSize, ///< [in,out][optional] size of global variable void** pptr ///< [in,out][optional] device visible pointer ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve all kernel names in the module. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetKernelNames( ze_module_handle_t hModule, ///< [in] handle of the module uint32_t* pCount, ///< [in,out] pointer to the number of names. ///< if count is zero, then the driver shall update the value with the ///< total number of names available. ///< if count is greater than the number of names available, then the ///< driver shall update the value with the correct number of names available. const char** pNames ///< [in,out][optional][range(0, *pCount)] array of names of functions. ///< if count is less than the number of names available, then driver shall ///< only retrieve that number of names. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported module property flags typedef uint32_t ze_module_property_flags_t; typedef enum _ze_module_property_flag_t { ZE_MODULE_PROPERTY_FLAG_IMPORTS = ZE_BIT(0), ///< Module has imports (i.e. imported global variables and/or kernels). ///< See ::zeModuleDynamicLink. ZE_MODULE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MODULE_PROPERTY_FLAG_* ENUMs } ze_module_property_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Module properties typedef struct _ze_module_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_module_property_flags_t flags; ///< [out] 0 (none) or a valid combination of ::ze_module_property_flag_t } ze_module_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve module properties. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pModuleProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetProperties( ze_module_handle_t hModule, ///< [in] handle of the module ze_module_properties_t* pModuleProperties ///< [in,out] query result for module properties. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported kernel creation flags typedef uint32_t ze_kernel_flags_t; typedef enum _ze_kernel_flag_t { ZE_KERNEL_FLAG_FORCE_RESIDENCY = ZE_BIT(0), ///< force all device allocations to be resident during execution ZE_KERNEL_FLAG_EXPLICIT_RESIDENCY = ZE_BIT(1), ///< application is responsible for all residency of device allocations. ///< driver may disable implicit residency management. ZE_KERNEL_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_KERNEL_FLAG_* ENUMs } ze_kernel_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel descriptor typedef struct _ze_kernel_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_kernel_flags_t flags; ///< [in] creation flags. ///< must be 0 (default) or a valid combination of ::ze_kernel_flag_t; ///< default behavior may use driver-based residency. const char* pKernelName; ///< [in] null-terminated name of kernel in module } ze_kernel_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Create a kernel from the module. /// /// @details /// - Modules that have unresolved imports need to be dynamically linked /// before a kernel can be created from them. (See ::zeModuleDynamicLink) /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == desc->pKernelName` /// + `nullptr == phKernel` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// - ::ZE_RESULT_ERROR_INVALID_KERNEL_NAME /// - ::ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelCreate( ze_module_handle_t hModule, ///< [in] handle of the module const ze_kernel_desc_t* desc, ///< [in] pointer to kernel descriptor ze_kernel_handle_t* phKernel ///< [out] handle of the Function object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a kernel object /// /// @details /// - The application must ensure the device is not currently referencing /// the kernel before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this kernel. /// - The application must **not** call this function from simultaneous /// threads with the same kernel handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelDestroy( ze_kernel_handle_t hKernel ///< [in][release] handle of the kernel object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve a function pointer from a module by name /// /// @details /// - The function pointer is unique for the device on which the module was /// created. /// - The function pointer is no longer valid if module is destroyed. /// - The function name should only refer to callable functions within the /// module. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hModule` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pFunctionName` /// + `nullptr == pfnFunction` /// - ::ZE_RESULT_ERROR_INVALID_FUNCTION_NAME ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetFunctionPointer( ze_module_handle_t hModule, ///< [in] handle of the module const char* pFunctionName, ///< [in] Name of function to retrieve function pointer for. void** pfnFunction ///< [out] pointer to function. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Set group size for a kernel. /// /// @details /// - The group size will be used when a ::zeCommandListAppendLaunchKernel /// variant is called. /// - The application must **not** call this function from simultaneous /// threads with the same kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetGroupSize( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t groupSizeX, ///< [in] group size for X dimension to use for this kernel uint32_t groupSizeY, ///< [in] group size for Y dimension to use for this kernel uint32_t groupSizeZ ///< [in] group size for Z dimension to use for this kernel ); /////////////////////////////////////////////////////////////////////////////// /// @brief Query a suggested group size for a kernel given a global size for each /// dimension. /// /// @details /// - This function ignores the group size that is set using /// ::zeKernelSetGroupSize. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == groupSizeX` /// + `nullptr == groupSizeY` /// + `nullptr == groupSizeZ` /// - ::ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestGroupSize( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t globalSizeX, ///< [in] global width for X dimension uint32_t globalSizeY, ///< [in] global width for Y dimension uint32_t globalSizeZ, ///< [in] global width for Z dimension uint32_t* groupSizeX, ///< [out] recommended size of group for X dimension uint32_t* groupSizeY, ///< [out] recommended size of group for Y dimension uint32_t* groupSizeZ ///< [out] recommended size of group for Z dimension ); /////////////////////////////////////////////////////////////////////////////// /// @brief Query a suggested max group count for a cooperative kernel. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == totalGroupCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestMaxCooperativeGroupCount( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t* totalGroupCount ///< [out] recommended total group count. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Set kernel argument for a kernel. /// /// @details /// - The argument values will be used when a /// ::zeCommandListAppendLaunchKernel variant is called. /// - The application must **not** call this function from simultaneous /// threads with the same kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetArgumentValue( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] size_t argSize, ///< [in] size of argument type const void* pArgValue ///< [in][optional] argument value represented as matching arg type. If ///< null then argument value is considered null. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel indirect access flags typedef uint32_t ze_kernel_indirect_access_flags_t; typedef enum _ze_kernel_indirect_access_flag_t { ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST = ZE_BIT(0), ///< Indicates that the kernel accesses host allocations indirectly. ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE = ZE_BIT(1), ///< Indicates that the kernel accesses device allocations indirectly. ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED = ZE_BIT(2), ///< Indicates that the kernel accesses shared allocations indirectly. ZE_KERNEL_INDIRECT_ACCESS_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_KERNEL_INDIRECT_ACCESS_FLAG_* ENUMs } ze_kernel_indirect_access_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Sets kernel indirect access flags. /// /// @details /// - The application should specify which allocations will be indirectly /// accessed by the kernel to allow driver to optimize which allocations /// are made resident /// - This function may **not** be called from simultaneous threads with the /// same Kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7 < flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetIndirectAccess( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object ze_kernel_indirect_access_flags_t flags ///< [in] kernel indirect access flags ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve kernel indirect access flags. /// /// @details /// - This function may be called from simultaneous threads with the same /// Kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pFlags` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetIndirectAccess( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object ze_kernel_indirect_access_flags_t* pFlags ///< [out] query result for kernel indirect access flags. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve all declared kernel attributes (i.e. can be specified with /// __attribute__ in runtime language). /// /// @details /// - This function may be called from simultaneous threads with the same /// Kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pSize` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetSourceAttributes( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t* pSize, ///< [in,out] pointer to size of string in bytes, including ///< null-terminating character. char** pString ///< [in,out][optional] pointer to application-managed character array ///< (string data). ///< If NULL, the string length of the kernel source attributes, including ///< a null-terminating character, is returned in pSize. Otherwise, pString ///< must point to valid application memory that is greater than or equal ///< to *pSize bytes in length, and on return the pointed-to string will ///< contain a space-separated list of kernel source attributes. Note: This ///< API was originally intended to ship with a char *pString, however this ///< typo was introduced. Thus the API has to stay this way for backwards ///< compatible reasons. It can be corrected in v2.0. Suggestion is to ///< create your own char *pString and then pass to this API with &pString. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported Cache Config flags typedef uint32_t ze_cache_config_flags_t; typedef enum _ze_cache_config_flag_t { ZE_CACHE_CONFIG_FLAG_LARGE_SLM = ZE_BIT(0), ///< Large SLM size ZE_CACHE_CONFIG_FLAG_LARGE_DATA = ZE_BIT(1), ///< Large General Data size ZE_CACHE_CONFIG_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_CACHE_CONFIG_FLAG_* ENUMs } ze_cache_config_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Sets the preferred cache configuration. /// /// @details /// - The cache configuration will be used when a /// ::zeCommandListAppendLaunchKernel variant is called. /// - The application must **not** call this function from simultaneous /// threads with the same kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < flags` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetCacheConfig( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object ze_cache_config_flags_t flags ///< [in] cache configuration. ///< must be 0 (default configuration) or a valid combination of ::ze_cache_config_flag_t. ); /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_KERNEL_UUID_SIZE /// @brief Maximum kernel universal unique id (UUID) size in bytes #define ZE_MAX_KERNEL_UUID_SIZE 16 #endif // ZE_MAX_KERNEL_UUID_SIZE /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_MODULE_UUID_SIZE /// @brief Maximum module universal unique id (UUID) size in bytes #define ZE_MAX_MODULE_UUID_SIZE 16 #endif // ZE_MAX_MODULE_UUID_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel universal unique id (UUID) typedef struct _ze_kernel_uuid_t { uint8_t kid[ZE_MAX_KERNEL_UUID_SIZE]; ///< [out] opaque data representing a kernel UUID uint8_t mid[ZE_MAX_MODULE_UUID_SIZE]; ///< [out] opaque data representing the kernel's module UUID } ze_kernel_uuid_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel properties typedef struct _ze_kernel_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t numKernelArgs; ///< [out] number of kernel arguments. uint32_t requiredGroupSizeX; ///< [out] required group size in the X dimension, ///< or zero if there is no required group size uint32_t requiredGroupSizeY; ///< [out] required group size in the Y dimension, ///< or zero if there is no required group size uint32_t requiredGroupSizeZ; ///< [out] required group size in the Z dimension, ///< or zero if there is no required group size uint32_t requiredNumSubGroups; ///< [out] required number of subgroups per thread group, ///< or zero if there is no required number of subgroups uint32_t requiredSubgroupSize; ///< [out] required subgroup size, ///< or zero if there is no required subgroup size uint32_t maxSubgroupSize; ///< [out] maximum subgroup size uint32_t maxNumSubgroups; ///< [out] maximum number of subgroups per thread group uint32_t localMemSize; ///< [out] local memory size used by each thread group uint32_t privateMemSize; ///< [out] private memory size allocated by compiler used by each thread uint32_t spillMemSize; ///< [out] spill memory size allocated by compiler ze_kernel_uuid_t uuid; ///< [out] universal unique identifier. } ze_kernel_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Additional kernel preferred group size properties /// /// @details /// - This structure may be passed to ::zeKernelGetProperties, via the /// `pNext` member of ::ze_kernel_properties_t, to query additional kernel /// preferred group size properties. typedef struct _ze_kernel_preferred_group_size_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t preferredMultiple; ///< [out] preferred group size multiple } ze_kernel_preferred_group_size_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve kernel properties. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pKernelProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetProperties( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object ze_kernel_properties_t* pKernelProperties ///< [in,out] query result for kernel properties. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieve kernel name from Kernel. /// /// @details /// - The caller can pass nullptr for pName when querying only for size. /// - The implementation will copy the kernel name into a buffer supplied by /// the caller. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pSize` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object size_t* pSize, ///< [in,out] size of kernel name string, including null terminator, in ///< bytes. char* pName ///< [in,out][optional] char pointer to kernel name. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel dispatch group count. typedef struct _ze_group_count_t { uint32_t groupCountX; ///< [in] number of thread groups in X dimension uint32_t groupCountY; ///< [in] number of thread groups in Y dimension uint32_t groupCountZ; ///< [in] number of thread groups in Z dimension } ze_group_count_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Launch kernel over one or more work groups. /// /// @details /// - The application must ensure the kernel and events are accessible by /// the device on which the command list was created. /// - This may **only** be called for a command list created with command /// queue group ordinal that supports compute. /// - The application must ensure the command list, kernel and events were /// created on the same context. /// - This function may **not** be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pLaunchFuncArgs` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object const ze_group_count_t* pLaunchFuncArgs, ///< [in] thread group launch arguments ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Launch kernel cooperatively over one or more work groups. /// /// @details /// - The application must ensure the kernel and events are accessible by /// the device on which the command list was created. /// - This may **only** be called for a command list created with command /// queue group ordinal that supports compute. /// - This may only be used for a command list that are submitted to command /// queue with cooperative flag set. /// - The application must ensure the command list, kernel and events were /// created on the same context. /// - This function may **not** be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// - Use ::zeKernelSuggestMaxCooperativeGroupCount to recommend max group /// count for device for cooperative functions that device supports. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pLaunchFuncArgs` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object const ze_group_count_t* pLaunchFuncArgs, ///< [in] thread group launch arguments ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Launch kernel over one or more work groups using indirect arguments. /// /// @details /// - The application must ensure the kernel and events are accessible by /// the device on which the command list was created. /// - The application must ensure the launch arguments are visible to the /// device on which the command list was created. /// - The implementation must not access the contents of the launch /// arguments as they are free to be modified by either the Host or device /// up until execution. /// - This may **only** be called for a command list created with command /// queue group ordinal that supports compute. /// - The application must ensure the command list, kernel and events were /// created, and the memory was allocated, on the same context. /// - This function may **not** be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pLaunchArgumentsBuffer` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object const ze_group_count_t* pLaunchArgumentsBuffer, ///< [in] pointer to device buffer that will contain thread group launch ///< arguments ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Launch multiple kernels over one or more work groups using an array of /// indirect arguments. /// /// @details /// - The application must ensure the kernel and events are accessible by /// the device on which the command list was created. /// - The application must ensure the array of launch arguments and count /// buffer are visible to the device on which the command list was /// created. /// - The implementation must not access the contents of the array of launch /// arguments or count buffer as they are free to be modified by either /// the Host or device up until execution. /// - This may **only** be called for a command list created with command /// queue group ordinal that supports compute. /// - The application must enusre the command list, kernel and events were /// created, and the memory was allocated, on the same context. /// - This function may **not** be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phKernels` /// + `nullptr == pCountBuffer` /// + `nullptr == pLaunchArgumentsBuffer` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint32_t numKernels, ///< [in] maximum number of kernels to launch ze_kernel_handle_t* phKernels, ///< [in][range(0, numKernels)] handles of the kernel objects const uint32_t* pCountBuffer, ///< [in] pointer to device memory location that will contain the actual ///< number of kernels to launch; value must be less than or equal to ///< numKernels const ze_group_count_t* pLaunchArgumentsBuffer, ///< [in][range(0, numKernels)] pointer to device buffer that will contain ///< a contiguous array of thread group launch arguments ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting module programs. #if !defined(__GNUC__) #pragma region program #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MODULE_PROGRAM_EXP_NAME /// @brief Module Program Extension Name #define ZE_MODULE_PROGRAM_EXP_NAME "ZE_experimental_module_program" #endif // ZE_MODULE_PROGRAM_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Module Program Extension Version(s) typedef enum _ze_module_program_exp_version_t { ZE_MODULE_PROGRAM_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_MODULE_PROGRAM_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_MODULE_PROGRAM_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MODULE_PROGRAM_EXP_VERSION_* ENUMs } ze_module_program_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Module extended descriptor to support multiple input modules. /// /// @details /// - Implementation must support ::ZE_experimental_module_program extension /// - Modules support import and export linkage for functions and global /// variables. /// - SPIR-V import and export linkage types are used. See SPIR-V /// specification for linkage details. /// - pInputModules, pBuildFlags, and pConstants from ::ze_module_desc_t is /// ignored. /// - Format in ::ze_module_desc_t needs to be set to /// ::ZE_MODULE_FORMAT_IL_SPIRV. typedef struct _ze_module_program_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t count; ///< [in] Count of input modules const size_t* inputSizes; ///< [in][range(0, count)] sizes of each input IL module in pInputModules. const uint8_t** pInputModules; ///< [in][range(0, count)] pointer to an array of IL (e.g. SPIR-V modules). ///< Valid only for SPIR-V input. const char** pBuildFlags; ///< [in][optional][range(0, count)] array of strings containing build ///< flags. See pBuildFlags in ::ze_module_desc_t. const ze_module_constants_t** pConstants; ///< [in][optional][range(0, count)] pointer to array of specialization ///< constant strings. Valid only for SPIR-V input. This must be set to ///< nullptr if no specialization constants are provided. } ze_module_program_exp_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Raytracing #if !defined(__GNUC__) #pragma region raytracing #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_RAYTRACING_EXT_NAME /// @brief Raytracing Extension Name #define ZE_RAYTRACING_EXT_NAME "ZE_extension_raytracing" #endif // ZE_RAYTRACING_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Raytracing Extension Version(s) typedef enum _ze_raytracing_ext_version_t { ZE_RAYTRACING_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_RAYTRACING_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_RAYTRACING_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RAYTRACING_EXT_VERSION_* ENUMs } ze_raytracing_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported raytracing capability flags typedef uint32_t ze_device_raytracing_ext_flags_t; typedef enum _ze_device_raytracing_ext_flag_t { ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY = ZE_BIT(0), ///< Supports rayquery ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_RAYTRACING_EXT_FLAG_* ENUMs } ze_device_raytracing_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Raytracing properties queried using ::zeDeviceGetModuleProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetModuleProperties, via /// the `pNext` member of ::ze_device_module_properties_t. typedef struct _ze_device_raytracing_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_raytracing_ext_flags_t flags; ///< [out] 0 or a valid combination of ::ze_device_raytracing_ext_flags_t uint32_t maxBVHLevels; ///< [out] Maximum number of BVH levels supported } ze_device_raytracing_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported raytracing memory allocation flags typedef uint32_t ze_raytracing_mem_alloc_ext_flags_t; typedef enum _ze_raytracing_mem_alloc_ext_flag_t { ZE_RAYTRACING_MEM_ALLOC_EXT_FLAG_TBD = ZE_BIT(0), ///< reserved for future use ZE_RAYTRACING_MEM_ALLOC_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RAYTRACING_MEM_ALLOC_EXT_FLAG_* ENUMs } ze_raytracing_mem_alloc_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Raytracing memory allocation descriptor /// /// @details /// - This structure must be passed to ::zeMemAllocShared or /// ::zeMemAllocDevice, via the `pNext` member of /// ::ze_device_mem_alloc_desc_t, for any memory allocation that is to be /// accessed by raytracing fixed-function of the device. typedef struct _ze_raytracing_mem_alloc_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_raytracing_mem_alloc_ext_flags_t flags; ///< [in] flags specifying additional allocation controls. ///< must be 0 (default) or a valid combination of ::ze_raytracing_mem_alloc_ext_flag_t; ///< default behavior may use implicit driver-based heuristics. } ze_raytracing_mem_alloc_ext_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Memory Residency #if !defined(__GNUC__) #pragma region residency #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Makes memory resident for the device. /// /// @details /// - The application must ensure the memory is resident before being /// referenced by the device /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ARGUMENT /// + ptr is not recognized by the implementation ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeMemoryResident( ze_context_handle_t hContext, ///< [in] handle of context object ze_device_handle_t hDevice, ///< [in] handle of the device void* ptr, ///< [in] pointer to memory to make resident size_t size ///< [in] size in bytes to make resident ); /////////////////////////////////////////////////////////////////////////////// /// @brief Allows memory to be evicted from the device. /// /// @details /// - The application must ensure the device is not currently referencing /// the memory before it is evicted /// - The application may free the memory without evicting; the memory is /// implicitly evicted when freed. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictMemory( ze_context_handle_t hContext, ///< [in] handle of context object ze_device_handle_t hDevice, ///< [in] handle of the device void* ptr, ///< [in] pointer to memory to evict size_t size ///< [in] size in bytes to evict ); /////////////////////////////////////////////////////////////////////////////// /// @brief Makes image resident for the device. /// /// @details /// - The application must ensure the image is resident before being /// referenced by the device /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// + `nullptr == hImage` ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeImageResident( ze_context_handle_t hContext, ///< [in] handle of context object ze_device_handle_t hDevice, ///< [in] handle of the device ze_image_handle_t hImage ///< [in] handle of image to make resident ); /////////////////////////////////////////////////////////////////////////////// /// @brief Allows image to be evicted from the device. /// /// @details /// - The application must ensure the device is not currently referencing /// the image before it is evicted /// - The application may destroy the image without evicting; the image is /// implicitly evicted when destroyed. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// + `nullptr == hImage` ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictImage( ze_context_handle_t hContext, ///< [in] handle of context object ze_device_handle_t hDevice, ///< [in] handle of the device ze_image_handle_t hImage ///< [in] handle of image to make evict ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Sampler #if !defined(__GNUC__) #pragma region sampler #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Sampler addressing modes typedef enum _ze_sampler_address_mode_t { ZE_SAMPLER_ADDRESS_MODE_NONE = 0, ///< No coordinate modifications for out-of-bounds image access. ZE_SAMPLER_ADDRESS_MODE_REPEAT = 1, ///< Out-of-bounds coordinates are wrapped back around. ZE_SAMPLER_ADDRESS_MODE_CLAMP = 2, ///< Out-of-bounds coordinates are clamped to edge. ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, ///< Out-of-bounds coordinates are clamped to border color which is (0.0f, ///< 0.0f, 0.0f, 0.0f) if image format swizzle contains alpha, otherwise ///< (0.0f, 0.0f, 0.0f, 1.0f). ZE_SAMPLER_ADDRESS_MODE_MIRROR = 4, ///< Out-of-bounds coordinates are mirrored starting from edge. ZE_SAMPLER_ADDRESS_MODE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SAMPLER_ADDRESS_MODE_* ENUMs } ze_sampler_address_mode_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Sampler filtering modes typedef enum _ze_sampler_filter_mode_t { ZE_SAMPLER_FILTER_MODE_NEAREST = 0, ///< No coordinate modifications for out of bounds image access. ZE_SAMPLER_FILTER_MODE_LINEAR = 1, ///< Out-of-bounds coordinates are wrapped back around. ZE_SAMPLER_FILTER_MODE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SAMPLER_FILTER_MODE_* ENUMs } ze_sampler_filter_mode_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Sampler descriptor typedef struct _ze_sampler_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_sampler_address_mode_t addressMode; ///< [in] Sampler addressing mode to determine how out-of-bounds ///< coordinates are handled. ze_sampler_filter_mode_t filterMode; ///< [in] Sampler filter mode to determine how samples are filtered. ze_bool_t isNormalized; ///< [in] Are coordinates normalized [0, 1] or not. } ze_sampler_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates sampler on the context. /// /// @details /// - The application must only use the sampler for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phSampler` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_SAMPLER_ADDRESS_MODE_MIRROR < desc->addressMode` /// + `::ZE_SAMPLER_FILTER_MODE_LINEAR < desc->filterMode` ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerCreate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device const ze_sampler_desc_t* desc, ///< [in] pointer to sampler descriptor ze_sampler_handle_t* phSampler ///< [out] handle of the sampler ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys sampler object /// /// @details /// - The application must ensure the device is not currently referencing /// the sampler before it is deleted. /// - The implementation of this function may immediately free all Host and /// Device allocations associated with this sampler. /// - The application must **not** call this function from simultaneous /// threads with the same sampler handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hSampler` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerDestroy( ze_sampler_handle_t hSampler ///< [in][release] handle of the sampler ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero APIs for Virtual Memory Management #if !defined(__GNUC__) #pragma region virtual #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Virtual memory page access attributes typedef enum _ze_memory_access_attribute_t { ZE_MEMORY_ACCESS_ATTRIBUTE_NONE = 0, ///< Indicates the memory page is inaccessible. ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE = 1, ///< Indicates the memory page supports read write access. ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY = 2, ///< Indicates the memory page supports read-only access. ZE_MEMORY_ACCESS_ATTRIBUTE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_ACCESS_ATTRIBUTE_* ENUMs } ze_memory_access_attribute_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Reserves pages in virtual address space. /// /// @details /// - The application must only use the memory allocation on the context for /// which it was created. /// - The starting address and size must be page aligned. See /// ::zeVirtualMemQueryPageSize. /// - If pStart is not null then implementation will attempt to reserve /// starting from that address. If not available then will find another /// suitable starting address. /// - The application may call this function from simultaneous threads. /// - The access attributes will default to none to indicate reservation is /// inaccessible. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pptr` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemReserve( ze_context_handle_t hContext, ///< [in] handle of the context object const void* pStart, ///< [in][optional] pointer to start of region to reserve. If nullptr then ///< implementation will choose a start address. size_t size, ///< [in] size in bytes to reserve; must be page aligned. void** pptr ///< [out] pointer to virtual reservation. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Free pages in a reserved virtual address range. /// /// @details /// - Any existing virtual mappings for the range will be unmapped. /// - Physical allocations objects that were mapped to this range will not /// be destroyed. These need to be destroyed explicitly. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemFree( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] pointer to start of region to free. size_t size ///< [in] size in bytes to free; must be page aligned. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Queries page size to use for aligning virtual memory reservations and /// physical memory allocations. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pagesize` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemQueryPageSize( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device object size_t size, ///< [in] unaligned allocation size in bytes size_t* pagesize ///< [out] pointer to page size to use for start address and size ///< alignments. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Supported physical memory creation flags typedef uint32_t ze_physical_mem_flags_t; typedef enum _ze_physical_mem_flag_t { ZE_PHYSICAL_MEM_FLAG_ALLOCATE_ON_DEVICE = ZE_BIT(0), ///< [default] allocate physical device memory. ZE_PHYSICAL_MEM_FLAG_ALLOCATE_ON_HOST = ZE_BIT(1), ///< Allocate physical host memory instead. ZE_PHYSICAL_MEM_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_PHYSICAL_MEM_FLAG_* ENUMs } ze_physical_mem_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Physical memory descriptor typedef struct _ze_physical_mem_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_physical_mem_flags_t flags; ///< [in] creation flags. ///< must be 0 (default) or a valid combination of ///< ::ze_physical_mem_flag_t; default is to create physical device memory. size_t size; ///< [in] size in bytes to reserve; must be page aligned. } ze_physical_mem_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a physical memory object for the context. /// /// @details /// - The application must only use the physical memory object on the /// context for which it was created. /// - The size must be page aligned. For host memory, the operating system /// page size should be used. For device memory, see /// ::zeVirtualMemQueryPageSize. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phPhysicalMemory` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == desc->size` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemCreate( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device object, can be `nullptr` if creating ///< physical host memory. ze_physical_mem_desc_t* desc, ///< [in] pointer to physical memory descriptor. ze_physical_mem_handle_t* phPhysicalMemory ///< [out] pointer to handle of physical memory object created ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a physical memory object. /// /// @details /// - The application must ensure the device is not currently referencing /// the physical memory object before it is deleted /// - The application must **not** call this function from simultaneous /// threads with the same physical memory handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hPhysicalMemory` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemDestroy( ze_context_handle_t hContext, ///< [in] handle of the context object ze_physical_mem_handle_t hPhysicalMemory ///< [in][release] handle of physical memory object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Maps pages in virtual address space to pages from physical memory /// object. /// /// @details /// - The virtual address range must have been reserved using /// ::zeVirtualMemReserve. /// - The application must only use the mapped memory allocation on the /// context for which it was created. /// - The virtual start address and size must be page aligned. See /// ::zeVirtualMemQueryPageSize. /// - The application should use, for the starting address and size, the /// same size alignment used for the physical allocation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hPhysicalMemory` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY < access` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemMap( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] pointer to start of virtual address range to map. size_t size, ///< [in] size in bytes of virtual address range to map; must be page ///< aligned. ze_physical_mem_handle_t hPhysicalMemory, ///< [in] handle to physical memory object. size_t offset, ///< [in] offset into physical memory allocation object; must be page ///< aligned. ze_memory_access_attribute_t access ///< [in] specifies page access attributes to apply to the virtual address ///< range. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Unmaps pages in virtual address space from pages from a physical /// memory object. /// /// @details /// - The page access attributes for virtual address range will revert back /// to none. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT /// + Address must be page aligned /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// + Size must be page aligned ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemUnmap( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] pointer to start of region to unmap. size_t size ///< [in] size in bytes to unmap; must be page aligned. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Set memory access attributes for a virtual address range. /// /// @details /// - This function may be called from simultaneous threads with the same /// function handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY < access` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT /// + Address must be page aligned /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// + Size must be page aligned ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemSetAccessAttribute( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] pointer to start of reserved virtual address region. size_t size, ///< [in] size in bytes; must be page aligned. ze_memory_access_attribute_t access ///< [in] specifies page access attributes to apply to the virtual address ///< range. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Get memory access attribute for a virtual address range. /// /// @details /// - If size and outSize are equal then the pages in the specified virtual /// address range have the same access attributes. /// - This function may be called from simultaneous threads with the same /// function handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// + `nullptr == access` /// + `nullptr == outSize` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT /// + Address must be page aligned /// - ::ZE_RESULT_ERROR_UNSUPPORTED_SIZE /// + `0 == size` /// + Size must be page aligned ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemGetAccessAttribute( ze_context_handle_t hContext, ///< [in] handle of the context object const void* ptr, ///< [in] pointer to start of virtual address region for query. size_t size, ///< [in] size in bytes; must be page aligned. ze_memory_access_attribute_t* access, ///< [out] query result for page access attribute. size_t* outSize ///< [out] query result for size of virtual address range, starting at ptr, ///< that shares same access attribute. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Floating-Point Atomics #if !defined(__GNUC__) #pragma region floatAtomics #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_FLOAT_ATOMICS_EXT_NAME /// @brief Floating-Point Atomics Extension Name #define ZE_FLOAT_ATOMICS_EXT_NAME "ZE_extension_float_atomics" #endif // ZE_FLOAT_ATOMICS_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Floating-Point Atomics Extension Version(s) typedef enum _ze_float_atomics_ext_version_t { ZE_FLOAT_ATOMICS_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_FLOAT_ATOMICS_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_FLOAT_ATOMICS_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_FLOAT_ATOMICS_EXT_VERSION_* ENUMs } ze_float_atomics_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported floating-point atomic capability flags typedef uint32_t ze_device_fp_atomic_ext_flags_t; typedef enum _ze_device_fp_atomic_ext_flag_t { ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE = ZE_BIT(0), ///< Supports atomic load, store, and exchange ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD = ZE_BIT(1), ///< Supports atomic add and subtract ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX = ZE_BIT(2), ///< Supports atomic min and max ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE = ZE_BIT(16), ///< Supports atomic load, store, and exchange ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD = ZE_BIT(17), ///< Supports atomic add and subtract ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX = ZE_BIT(18), ///< Supports atomic min and max ZE_DEVICE_FP_ATOMIC_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_FP_ATOMIC_EXT_FLAG_* ENUMs } ze_device_fp_atomic_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device floating-point atomic properties queried using /// ::zeDeviceGetModuleProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetModuleProperties, via /// the `pNext` member of ::ze_device_module_properties_t. typedef struct _ze_float_atomic_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_fp_atomic_ext_flags_t fp16Flags; ///< [out] Capabilities for half-precision floating-point atomic operations ze_device_fp_atomic_ext_flags_t fp32Flags; ///< [out] Capabilities for single-precision floating-point atomic ///< operations ze_device_fp_atomic_ext_flags_t fp64Flags; ///< [out] Capabilities for double-precision floating-point atomic ///< operations } ze_float_atomic_ext_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting kernel global work offset. #if !defined(__GNUC__) #pragma region globaloffset #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_GLOBAL_OFFSET_EXP_NAME /// @brief Global Offset Extension Name #define ZE_GLOBAL_OFFSET_EXP_NAME "ZE_experimental_global_offset" #endif // ZE_GLOBAL_OFFSET_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Global Offset Extension Version(s) typedef enum _ze_global_offset_exp_version_t { ZE_GLOBAL_OFFSET_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_GLOBAL_OFFSET_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_GLOBAL_OFFSET_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_GLOBAL_OFFSET_EXP_VERSION_* ENUMs } ze_global_offset_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Set global work offset for a kernel. /// /// @details /// - The global work offset will be used when a /// ::zeCommandListAppendLaunchKernel() variant is called. /// - The application must **not** call this function from simultaneous /// threads with the same kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetGlobalOffsetExp( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t offsetX, ///< [in] global offset for X dimension to use for this kernel uint32_t offsetY, ///< [in] global offset for Y dimension to use for this kernel uint32_t offsetZ ///< [in] global offset for Z dimension to use for this kernel ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting relaxed allocation limits. #if !defined(__GNUC__) #pragma region relaxedAllocLimits #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_RELAXED_ALLOCATION_LIMITS_EXP_NAME /// @brief Relaxed Allocation Limits Extension Name #define ZE_RELAXED_ALLOCATION_LIMITS_EXP_NAME "ZE_experimental_relaxed_allocation_limits" #endif // ZE_RELAXED_ALLOCATION_LIMITS_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Relaxed Allocation Limits Extension Version(s) typedef enum _ze_relaxed_allocation_limits_exp_version_t { ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_* ENUMs } ze_relaxed_allocation_limits_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported relaxed memory allocation flags typedef uint32_t ze_relaxed_allocation_limits_exp_flags_t; typedef enum _ze_relaxed_allocation_limits_exp_flag_t { ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE = ZE_BIT(0), ///< Allocation size may exceed the `maxMemAllocSize` member of ///< ::ze_device_properties_t. ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_* ENUMs } ze_relaxed_allocation_limits_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Relaxed limits memory allocation descriptor /// /// @details /// - This structure may be passed to ::zeMemAllocShared or /// ::zeMemAllocDevice, via the `pNext` member of /// ::ze_device_mem_alloc_desc_t. /// - This structure may also be passed to ::zeMemAllocHost, via the `pNext` /// member of ::ze_host_mem_alloc_desc_t. typedef struct _ze_relaxed_allocation_limits_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_relaxed_allocation_limits_exp_flags_t flags; ///< [in] flags specifying allocation limits to relax. ///< must be 0 (default) or a valid combination of ::ze_relaxed_allocation_limits_exp_flag_t; } ze_relaxed_allocation_limits_exp_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for retrieving kernel binary program data. #if !defined(__GNUC__) #pragma region kernelBinary #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_GET_KERNEL_BINARY_EXP_NAME /// @brief Get Kernel Binary Extension Name #define ZE_GET_KERNEL_BINARY_EXP_NAME "ZE_extension_kernel_binary_exp" #endif // ZE_GET_KERNEL_BINARY_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Get Kernel Binary Extension Version(s) typedef enum _ze_kernel_get_binary_exp_version_t { ZE_KERNEL_GET_BINARY_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_KERNEL_GET_BINARY_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_KERNEL_GET_BINARY_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_KERNEL_GET_BINARY_EXP_VERSION_* ENUMs } ze_kernel_get_binary_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves kernel binary program data (ISA GEN format). /// /// @details /// - A valid kernel handle must be created with ::zeKernelCreate. /// - Returns Intel Graphics Assembly (GEN ISA) format binary program data /// for kernel handle. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pSize` /// + `nullptr == pKernelBinary` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetBinaryExp( ze_kernel_handle_t hKernel, ///< [in] Kernel handle. size_t* pSize, ///< [in,out] pointer to variable with size of GEN ISA binary. uint8_t* pKernelBinary ///< [in,out] pointer to storage area for GEN ISA binary function. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for Driver Direct Device Interface (DDI) Handles #if !defined(__GNUC__) #pragma region driverDDIHandles #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DRIVER_DDI_HANDLES_EXT_NAME /// @brief Driver Direct Device Interface (DDI) Handles Extension Name #define ZE_DRIVER_DDI_HANDLES_EXT_NAME "ZE_extension_driver_ddi_handles" #endif // ZE_DRIVER_DDI_HANDLES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Driver Direct Device Interface (DDI) Handles Extension Version(s) typedef enum _ze_driver_ddi_handles_ext_version_t { ZE_DRIVER_DDI_HANDLES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_DRIVER_DDI_HANDLES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_DRIVER_DDI_HANDLES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DRIVER_DDI_HANDLES_EXT_VERSION_* ENUMs } ze_driver_ddi_handles_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Driver Direct Device Interface (DDI) Handle Extension Flags typedef uint32_t ze_driver_ddi_handle_ext_flags_t; typedef enum _ze_driver_ddi_handle_ext_flag_t { ZE_DRIVER_DDI_HANDLE_EXT_FLAG_DDI_HANDLE_EXT_SUPPORTED = ZE_BIT(0), ///< Driver Supports DDI Handles Extension ZE_DRIVER_DDI_HANDLE_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DRIVER_DDI_HANDLE_EXT_FLAG_* ENUMs } ze_driver_ddi_handle_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Driver DDI Handles properties queried using ::zeDriverGetProperties /// /// @details /// - This structure may be returned from ::zeDriverGetProperties, via the /// `pNext` member of ::ze_driver_properties_t. typedef struct _ze_driver_ddi_handles_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_driver_ddi_handle_ext_flags_t flags; ///< [out] 0 (none) or a valid combination of ::ze_driver_ddi_handle_ext_flags_t } ze_driver_ddi_handles_ext_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for external semaphores #if !defined(__GNUC__) #pragma region externalSemaphores #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME /// @brief External Semaphores Extension Name #define ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME "ZE_extension_external_semaphores" #endif // ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphores Extension Version typedef enum _ze_external_semaphore_ext_version_t { ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_* ENUMs } ze_external_semaphore_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of external semaphore object typedef struct _ze_external_semaphore_ext_handle_t *ze_external_semaphore_ext_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphores Type Flags typedef uint32_t ze_external_semaphore_ext_flags_t; typedef enum _ze_external_semaphore_ext_flag_t { ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_FD = ZE_BIT(0), ///< Semaphore is an Linux opaque file descriptor ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_WIN32 = ZE_BIT(1), ///< Semaphore is an opaque Win32 handle for monitored fence ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_WIN32_KMT = ZE_BIT(2), ///< Semaphore is an opaque Win32 KMT handle for monitored fence ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D12_FENCE = ZE_BIT(3), ///< Semaphore is a D3D12 fence ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D11_FENCE = ZE_BIT(4), ///< Semaphore is a D3D11 fence ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_KEYED_MUTEX = ZE_BIT(5), ///< Semaphore is a keyed mutex for Win32 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_KEYED_MUTEX_KMT = ZE_BIT(6), ///< Semaphore is a keyed mutex for Win32 KMT ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_FD = ZE_BIT(7), ///< Semaphore is a Vulkan Timeline semaphore for Linux ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_WIN32 = ZE_BIT(8), ///< Semaphore is a Vulkan Timeline semaphore for Win32 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_* ENUMs } ze_external_semaphore_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphore Descriptor typedef struct _ze_external_semaphore_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_external_semaphore_ext_flags_t flags; ///< [in] The flags describing the type of the semaphore. ///< must be 0 (default) or a valid combination of ::ze_external_semaphore_ext_flag_t. ///< When importing a semaphore, pNext should be pointing to one of the ///< following structures: ::ze_external_semaphore_win32_ext_desc_t or ::ze_external_semaphore_fd_ext_desc_t. } ze_external_semaphore_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphore Win32 Descriptor typedef struct _ze_external_semaphore_win32_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). void* handle; ///< [in] Win32 handle of the semaphore. ///< Must be a valid Win32 handle. const char* name; ///< [in] Name of the semaphore. ///< Must be a valid null-terminated string. } ze_external_semaphore_win32_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphore FD Descriptor typedef struct _ze_external_semaphore_fd_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). int fd; ///< [in] File descriptor of the semaphore. ///< Must be a valid file descriptor. } ze_external_semaphore_fd_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphore Signal parameters typedef struct _ze_external_semaphore_signal_params_ext_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t value; ///< [in] [optional] Value to signal. ///< Specified by user as an expected value with some of semaphore types, ///< such as ::ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D12_FENCE. } ze_external_semaphore_signal_params_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief External Semaphore Wait parameters typedef struct _ze_external_semaphore_wait_params_ext_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t value; ///< [in] [optional] Value to wait for. ///< Specified by user as an expected value with some of semaphore types, ///< such as ::ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D12_FENCE. } ze_external_semaphore_wait_params_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Import an external semaphore /// /// @details /// - Imports an external semaphore. /// - This function may be called from simultaneous threads with the same /// device handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phSemaphore` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x1ff < desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceImportExternalSemaphoreExt( ze_device_handle_t hDevice, ///< [in] The device handle. const ze_external_semaphore_ext_desc_t* desc, ///< [in] The pointer to external semaphore descriptor. ze_external_semaphore_ext_handle_t* phSemaphore ///< [out] The handle of the external semaphore imported. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Release an external semaphore /// /// @details /// - The application must ensure the device is not currently referencing /// the semaphore before it is released. /// - The application must **not** call this function from simultaneous /// threads with the same semaphore handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hSemaphore` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceReleaseExternalSemaphoreExt( ze_external_semaphore_ext_handle_t hSemaphore ///< [in] The handle of the external semaphore. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Signal an external semaphore /// /// @details /// - Signals an external semaphore. /// - This function must only be used with an immediate command list. /// - This function may be called from simultaneous threads with the same /// command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phSemaphores` /// + `nullptr == signalParams` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` /// + `(nullptr == phSemaphores) && (0 < numSemaphores)` /// + `(nullptr == signalParams) && (0 < numSemaphores)` /// - ::ZE_RESULT_ERROR_INVALID_ARGUMENT /// + Commandlist handle does not correspond to an immediate command list ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendSignalExternalSemaphoreExt( ze_command_list_handle_t hCommandList, ///< [in] The command list handle. uint32_t numSemaphores, ///< [in] The number of external semaphores. ze_external_semaphore_ext_handle_t* phSemaphores, ///< [in][range(0, numSemaphores)] The vector of external semaphore handles ///< to be appended into command list. ze_external_semaphore_signal_params_ext_t* signalParams, ///< [in] Signal parameters. ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Wait on external semaphores /// /// @details /// - Waits on external semaphores. /// - This function must only be used with an immediate command list. /// - This function may be called from simultaneous threads with the same /// command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phSemaphores` /// + `nullptr == waitParams` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` /// + `(nullptr == phSemaphores) && (0 < numSemaphores)` /// + `(nullptr == waitParams) && (0 < numSemaphores)` /// - ::ZE_RESULT_ERROR_INVALID_ARGUMENT /// + Commandlist handle does not correspond to an immediate command list ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWaitExternalSemaphoreExt( ze_command_list_handle_t hCommandList, ///< [in] The command list handle. uint32_t numSemaphores, ///< [in] The number of external semaphores. ze_external_semaphore_ext_handle_t* phSemaphores, ///< [in] [range(0,numSemaphores)] The vector of external semaphore handles ///< to append into command list. ze_external_semaphore_wait_params_ext_t* waitParams, ///< [in] Wait parameters. ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for CacheLine Size #if !defined(__GNUC__) #pragma region CacheLineSize #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_CACHELINE_SIZE_EXT_NAME /// @brief CacheLine Size Extension Name #define ZE_CACHELINE_SIZE_EXT_NAME "ZE_extension_device_cache_line_size" #endif // ZE_CACHELINE_SIZE_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief CacheLine Size Extension Version(s) typedef enum _ze_device_cache_line_size_ext_version_t { ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ),///< latest known version ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_* ENUMs } ze_device_cache_line_size_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief CacheLine Size queried using ::zeDeviceGetCacheProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetCacheProperties via /// the `pNext` member of ::ze_device_cache_properties_t. /// - Used for determining the cache line size supported on a device. typedef struct _ze_device_cache_line_size_ext_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). size_t cacheLineSize; ///< [out] The cache line size in bytes. } ze_device_cache_line_size_ext_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting ray tracing acceleration structure. #if !defined(__GNUC__) #pragma region RTAS #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_RTAS_EXT_NAME /// @brief Ray Tracing Acceleration Structure Extension Name #define ZE_RTAS_EXT_NAME "ZE_extension_rtas" #endif // ZE_RTAS_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Ray Tracing Acceleration Structure Builder Extension Version(s) typedef enum _ze_rtas_builder_ext_version_t { ZE_RTAS_BUILDER_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_RTAS_BUILDER_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_RTAS_BUILDER_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_EXT_VERSION_* ENUMs } ze_rtas_builder_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure device flags typedef uint32_t ze_rtas_device_ext_flags_t; typedef enum _ze_rtas_device_ext_flag_t { ZE_RTAS_DEVICE_EXT_FLAG_RESERVED = ZE_BIT(0), ///< reserved for future use ZE_RTAS_DEVICE_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_DEVICE_EXT_FLAG_* ENUMs } ze_rtas_device_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure format /// /// @details /// - This is an opaque ray tracing acceleration structure format /// identifier. typedef enum _ze_rtas_format_ext_t { ZE_RTAS_FORMAT_EXT_INVALID = 0x0, ///< Invalid acceleration structure format code ZE_RTAS_FORMAT_EXT_MAX = 0x7ffffffe, ///< Maximum acceleration structure format code ZE_RTAS_FORMAT_EXT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_FORMAT_EXT_* ENUMs } ze_rtas_format_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder flags typedef uint32_t ze_rtas_builder_ext_flags_t; typedef enum _ze_rtas_builder_ext_flag_t { ZE_RTAS_BUILDER_EXT_FLAG_RESERVED = ZE_BIT(0), ///< Reserved for future use ZE_RTAS_BUILDER_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_EXT_FLAG_* ENUMs } ze_rtas_builder_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder parallel operation flags typedef uint32_t ze_rtas_parallel_operation_ext_flags_t; typedef enum _ze_rtas_parallel_operation_ext_flag_t { ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_RESERVED = ZE_BIT(0), ///< Reserved for future use ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_* ENUMs } ze_rtas_parallel_operation_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder geometry flags typedef uint32_t ze_rtas_builder_geometry_ext_flags_t; typedef enum _ze_rtas_builder_geometry_ext_flag_t { ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_NON_OPAQUE = ZE_BIT(0), ///< non-opaque geometries invoke an any-hit shader ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_* ENUMs } ze_rtas_builder_geometry_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure builder geometry flags (see /// ::ze_rtas_builder_geometry_ext_flags_t) typedef uint8_t ze_rtas_builder_packed_geometry_ext_flags_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder instance flags typedef uint32_t ze_rtas_builder_instance_ext_flags_t; typedef enum _ze_rtas_builder_instance_ext_flag_t { ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_CULL_DISABLE = ZE_BIT(0), ///< disables culling of front-facing and back-facing triangles ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = ZE_BIT(1), ///< reverses front and back face of triangles ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FORCE_OPAQUE = ZE_BIT(2), ///< forces instanced geometry to be opaque, unless ray flag forces it to ///< be non-opaque ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FORCE_NON_OPAQUE = ZE_BIT(3),///< forces instanced geometry to be non-opaque, unless ray flag forces it ///< to be opaque ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_* ENUMs } ze_rtas_builder_instance_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure builder instance flags (see /// ::ze_rtas_builder_instance_ext_flags_t) typedef uint8_t ze_rtas_builder_packed_instance_ext_flags_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder build operation flags /// /// @details /// - These flags allow the application to tune the acceleration structure /// build operation. /// - The acceleration structure builder implementation might choose to use /// spatial splitting to split large or long primitives into smaller /// pieces. This may result in any-hit shaders being invoked multiple /// times for non-opaque primitives, unless /// ::ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION is specified. /// - Usage of any of these flags may reduce ray tracing performance. typedef uint32_t ze_rtas_builder_build_op_ext_flags_t; typedef enum _ze_rtas_builder_build_op_ext_flag_t { ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_COMPACT = ZE_BIT(0), ///< build more compact acceleration structure ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION = ZE_BIT(1), ///< guarantees single any-hit shader invocation per primitive ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_* ENUMs } ze_rtas_builder_build_op_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder build quality hint /// /// @details /// - Depending on use case different quality modes for acceleration /// structure build are supported. /// - A low-quality build builds an acceleration structure fast, but at the /// cost of some reduction in ray tracing performance. This mode is /// recommended for dynamic content, such as animated characters. /// - A medium-quality build uses a compromise between build quality and ray /// tracing performance. This mode should be used by default. /// - Higher ray tracing performance can be achieved by using a high-quality /// build, but acceleration structure build performance might be /// significantly reduced. typedef enum _ze_rtas_builder_build_quality_hint_ext_t { ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_LOW = 0, ///< build low-quality acceleration structure (fast) ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_MEDIUM = 1, ///< build medium-quality acceleration structure (slower) ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_HIGH = 2, ///< build high-quality acceleration structure (slow) ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_* ENUMs } ze_rtas_builder_build_quality_hint_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder geometry type typedef enum _ze_rtas_builder_geometry_type_ext_t { ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_TRIANGLES = 0, ///< triangle mesh geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_QUADS = 1, ///< quad mesh geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_PROCEDURAL = 2, ///< procedural geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_INSTANCE = 3, ///< instance geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_* ENUMs } ze_rtas_builder_geometry_type_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure builder geometry type (see /// ::ze_rtas_builder_geometry_type_ext_t) typedef uint8_t ze_rtas_builder_packed_geometry_type_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure data buffer element format /// /// @details /// - Specifies the format of data buffer elements. /// - Data buffers may contain instancing transform matrices, triangle/quad /// vertex indices, etc... typedef enum _ze_rtas_builder_input_data_format_ext_t { ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3 = 0, ///< 3-component float vector (see ::ze_rtas_float3_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_COLUMN_MAJOR = 1, ///< 3x4 affine transformation in column-major format (see ///< ::ze_rtas_transform_float3x4_column_major_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_ALIGNED_COLUMN_MAJOR = 2,///< 3x4 affine transformation in column-major format (see ///< ::ze_rtas_transform_float3x4_aligned_column_major_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_ROW_MAJOR = 3, ///< 3x4 affine transformation in row-major format (see ///< ::ze_rtas_transform_float3x4_row_major_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_AABB = 4, ///< 3-dimensional axis-aligned bounding-box (see ::ze_rtas_aabb_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_TRIANGLE_INDICES_UINT32 = 5, ///< Unsigned 32-bit triangle indices (see ///< ::ze_rtas_triangle_indices_uint32_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_QUAD_INDICES_UINT32 = 6, ///< Unsigned 32-bit quad indices (see ::ze_rtas_quad_indices_uint32_ext_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_* ENUMs } ze_rtas_builder_input_data_format_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure data buffer element format /// (see ::ze_rtas_builder_input_data_format_ext_t) typedef uint8_t ze_rtas_builder_packed_input_data_format_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of ray tracing acceleration structure builder object typedef struct _ze_rtas_builder_ext_handle_t *ze_rtas_builder_ext_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of ray tracing acceleration structure builder parallel /// operation object typedef struct _ze_rtas_parallel_operation_ext_handle_t *ze_rtas_parallel_operation_ext_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder descriptor typedef struct _ze_rtas_builder_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_builder_ext_version_t builderVersion; ///< [in] ray tracing acceleration structure builder version } ze_rtas_builder_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder properties typedef struct _ze_rtas_builder_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_builder_ext_flags_t flags; ///< [out] ray tracing acceleration structure builder flags size_t rtasBufferSizeBytesExpected; ///< [out] expected size (in bytes) required for acceleration structure buffer ///< - When using an acceleration structure buffer of this size, the ///< build is expected to succeed; however, it is possible that the build ///< may fail with ::ZE_RESULT_EXT_RTAS_BUILD_RETRY size_t rtasBufferSizeBytesMaxRequired; ///< [out] worst-case size (in bytes) required for acceleration structure buffer ///< - When using an acceleration structure buffer of this size, the ///< build is guaranteed to not run out of memory. size_t scratchBufferSizeBytes; ///< [out] scratch buffer size (in bytes) required for acceleration ///< structure build. } ze_rtas_builder_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder parallel operation /// properties typedef struct _ze_rtas_parallel_operation_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_parallel_operation_ext_flags_t flags; ///< [out] ray tracing acceleration structure builder parallel operation ///< flags uint32_t maxConcurrency; ///< [out] maximum number of threads that may join the parallel operation } ze_rtas_parallel_operation_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure device properties /// /// @details /// - This structure may be passed to ::zeDeviceGetProperties, via `pNext` /// member of ::ze_device_properties_t. /// - The implementation shall populate `format` with a value other than /// ::ZE_RTAS_FORMAT_EXT_INVALID when the device supports ray tracing. typedef struct _ze_rtas_device_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_device_ext_flags_t flags; ///< [out] ray tracing acceleration structure device flags ze_rtas_format_ext_t rtasFormat; ///< [out] ray tracing acceleration structure format uint32_t rtasBufferAlignment; ///< [out] required alignment of acceleration structure buffer } ze_rtas_device_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief A 3-component vector type typedef struct _ze_rtas_float3_ext_t { float x; ///< [in] x-coordinate of float3 vector float y; ///< [in] y-coordinate of float3 vector float z; ///< [in] z-coordinate of float3 vector } ze_rtas_float3_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief 3x4 affine transformation in column-major layout /// /// @details /// - A 3x4 affine transformation in column major layout, consisting of vectors /// - vx=(vx_x, vx_y, vx_z), /// - vy=(vy_x, vy_y, vy_z), /// - vz=(vz_x, vz_y, vz_z), and /// - p=(p_x, p_y, p_z) /// - The transformation transforms a point (x, y, z) to: `x*vx + y*vy + /// z*vz + p`. typedef struct _ze_rtas_transform_float3x4_column_major_ext_t { float vx_x; ///< [in] element 0 of column 0 of 3x4 matrix float vx_y; ///< [in] element 1 of column 0 of 3x4 matrix float vx_z; ///< [in] element 2 of column 0 of 3x4 matrix float vy_x; ///< [in] element 0 of column 1 of 3x4 matrix float vy_y; ///< [in] element 1 of column 1 of 3x4 matrix float vy_z; ///< [in] element 2 of column 1 of 3x4 matrix float vz_x; ///< [in] element 0 of column 2 of 3x4 matrix float vz_y; ///< [in] element 1 of column 2 of 3x4 matrix float vz_z; ///< [in] element 2 of column 2 of 3x4 matrix float p_x; ///< [in] element 0 of column 3 of 3x4 matrix float p_y; ///< [in] element 1 of column 3 of 3x4 matrix float p_z; ///< [in] element 2 of column 3 of 3x4 matrix } ze_rtas_transform_float3x4_column_major_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief 3x4 affine transformation in column-major layout with aligned column /// vectors /// /// @details /// - A 3x4 affine transformation in column major layout, consisting of vectors /// - vx=(vx_x, vx_y, vx_z), /// - vy=(vy_x, vy_y, vy_z), /// - vz=(vz_x, vz_y, vz_z), and /// - p=(p_x, p_y, p_z) /// - The transformation transforms a point (x, y, z) to: `x*vx + y*vy + /// z*vz + p`. /// - The column vectors are aligned to 16-bytes and pad members are /// ignored. typedef struct _ze_rtas_transform_float3x4_aligned_column_major_ext_t { float vx_x; ///< [in] element 0 of column 0 of 3x4 matrix float vx_y; ///< [in] element 1 of column 0 of 3x4 matrix float vx_z; ///< [in] element 2 of column 0 of 3x4 matrix float pad0; ///< [in] ignored padding float vy_x; ///< [in] element 0 of column 1 of 3x4 matrix float vy_y; ///< [in] element 1 of column 1 of 3x4 matrix float vy_z; ///< [in] element 2 of column 1 of 3x4 matrix float pad1; ///< [in] ignored padding float vz_x; ///< [in] element 0 of column 2 of 3x4 matrix float vz_y; ///< [in] element 1 of column 2 of 3x4 matrix float vz_z; ///< [in] element 2 of column 2 of 3x4 matrix float pad2; ///< [in] ignored padding float p_x; ///< [in] element 0 of column 3 of 3x4 matrix float p_y; ///< [in] element 1 of column 3 of 3x4 matrix float p_z; ///< [in] element 2 of column 3 of 3x4 matrix float pad3; ///< [in] ignored padding } ze_rtas_transform_float3x4_aligned_column_major_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief 3x4 affine transformation in row-major layout /// /// @details /// - A 3x4 affine transformation in row-major layout, consisting of vectors /// - vx=(vx_x, vx_y, vx_z), /// - vy=(vy_x, vy_y, vy_z), /// - vz=(vz_x, vz_y, vz_z), and /// - p=(p_x, p_y, p_z) /// - The transformation transforms a point (x, y, z) to: `x*vx + y*vy + /// z*vz + p`. typedef struct _ze_rtas_transform_float3x4_row_major_ext_t { float vx_x; ///< [in] element 0 of row 0 of 3x4 matrix float vy_x; ///< [in] element 1 of row 0 of 3x4 matrix float vz_x; ///< [in] element 2 of row 0 of 3x4 matrix float p_x; ///< [in] element 3 of row 0 of 3x4 matrix float vx_y; ///< [in] element 0 of row 1 of 3x4 matrix float vy_y; ///< [in] element 1 of row 1 of 3x4 matrix float vz_y; ///< [in] element 2 of row 1 of 3x4 matrix float p_y; ///< [in] element 3 of row 1 of 3x4 matrix float vx_z; ///< [in] element 0 of row 2 of 3x4 matrix float vy_z; ///< [in] element 1 of row 2 of 3x4 matrix float vz_z; ///< [in] element 2 of row 2 of 3x4 matrix float p_z; ///< [in] element 3 of row 2 of 3x4 matrix } ze_rtas_transform_float3x4_row_major_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief A 3-dimensional axis-aligned bounding-box with lower and upper bounds /// in each dimension typedef struct _ze_rtas_aabb_ext_t { ze_rtas_float3_ext_t lower; ///< [in] lower bounds of AABB ze_rtas_float3_ext_t upper; ///< [in] upper bounds of AABB } ze_rtas_aabb_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Triangle represented using 3 vertex indices /// /// @details /// - Represents a triangle using 3 vertex indices that index into a vertex /// array that needs to be provided together with the index array. /// - The linear barycentric u/v parametrization of the triangle is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, and /// - (u=0, v=1) at v2 typedef struct _ze_rtas_triangle_indices_uint32_ext_t { uint32_t v0; ///< [in] first index pointing to the first triangle vertex in vertex array uint32_t v1; ///< [in] second index pointing to the second triangle vertex in vertex ///< array uint32_t v2; ///< [in] third index pointing to the third triangle vertex in vertex array } ze_rtas_triangle_indices_uint32_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Quad represented using 4 vertex indices /// /// @details /// - Represents a quad composed of 4 indices that index into a vertex array /// that needs to be provided together with the index array. /// - A quad is a triangle pair represented using 4 vertex indices v0, v1, /// v2, v3. /// The first triangle is made out of indices v0, v1, v3 and the second triangle /// from indices v2, v3, v1. The piecewise linear barycentric u/v parametrization /// of the quad is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, /// - (u=0, v=1) at v3, and /// - (u=1, v=1) at v2 /// This is achieved by correcting the u'/v' coordinates of the second /// triangle by /// *u = 1-u'* and *v = 1-v'*, yielding a piecewise linear parametrization. typedef struct _ze_rtas_quad_indices_uint32_ext_t { uint32_t v0; ///< [in] first index pointing to the first quad vertex in vertex array uint32_t v1; ///< [in] second index pointing to the second quad vertex in vertex array uint32_t v2; ///< [in] third index pointing to the third quad vertex in vertex array uint32_t v3; ///< [in] fourth index pointing to the fourth quad vertex in vertex array } ze_rtas_quad_indices_uint32_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder geometry info typedef struct _ze_rtas_builder_geometry_info_ext_t { ze_rtas_builder_packed_geometry_type_ext_t geometryType; ///< [in] geometry type } ze_rtas_builder_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder triangle mesh geometry info /// /// @details /// - The linear barycentric u/v parametrization of the triangle is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, and /// - (u=0, v=1) at v2 typedef struct _ze_rtas_builder_triangles_geometry_info_ext_t { ze_rtas_builder_packed_geometry_type_ext_t geometryType; ///< [in] geometry type, must be ///< ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_TRIANGLES ze_rtas_builder_packed_geometry_ext_flags_t geometryFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_ext_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking ze_rtas_builder_packed_input_data_format_ext_t triangleFormat; ///< [in] format of triangle buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_TRIANGLE_INDICES_UINT32 ze_rtas_builder_packed_input_data_format_ext_t vertexFormat; ///< [in] format of vertex buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3 uint32_t triangleCount; ///< [in] number of triangles in triangle buffer uint32_t vertexCount; ///< [in] number of vertices in vertex buffer uint32_t triangleStride; ///< [in] stride (in bytes) of triangles in triangle buffer uint32_t vertexStride; ///< [in] stride (in bytes) of vertices in vertex buffer void* pTriangleBuffer; ///< [in] pointer to array of triangle indices in specified format void* pVertexBuffer; ///< [in] pointer to array of triangle vertices in specified format } ze_rtas_builder_triangles_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder quad mesh geometry info /// /// @details /// - A quad is a triangle pair represented using 4 vertex indices v0, v1, /// v2, v3. /// The first triangle is made out of indices v0, v1, v3 and the second triangle /// from indices v2, v3, v1. The piecewise linear barycentric u/v parametrization /// of the quad is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, /// - (u=0, v=1) at v3, and /// - (u=1, v=1) at v2 /// This is achieved by correcting the u'/v' coordinates of the second /// triangle by /// *u = 1-u'* and *v = 1-v'*, yielding a piecewise linear parametrization. typedef struct _ze_rtas_builder_quads_geometry_info_ext_t { ze_rtas_builder_packed_geometry_type_ext_t geometryType; ///< [in] geometry type, must be ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_QUADS ze_rtas_builder_packed_geometry_ext_flags_t geometryFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_ext_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking ze_rtas_builder_packed_input_data_format_ext_t quadFormat; ///< [in] format of quad buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_QUAD_INDICES_UINT32 ze_rtas_builder_packed_input_data_format_ext_t vertexFormat; ///< [in] format of vertex buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3 uint32_t quadCount; ///< [in] number of quads in quad buffer uint32_t vertexCount; ///< [in] number of vertices in vertex buffer uint32_t quadStride; ///< [in] stride (in bytes) of quads in quad buffer uint32_t vertexStride; ///< [in] stride (in bytes) of vertices in vertex buffer void* pQuadBuffer; ///< [in] pointer to array of quad indices in specified format void* pVertexBuffer; ///< [in] pointer to array of quad vertices in specified format } ze_rtas_builder_quads_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief AABB callback function parameters typedef struct _ze_rtas_geometry_aabbs_ext_cb_params_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t primID; ///< [in] first primitive to return bounds for uint32_t primIDCount; ///< [in] number of primitives to return bounds for void* pGeomUserPtr; ///< [in] pointer provided through geometry descriptor void* pBuildUserPtr; ///< [in] pointer provided through ::zeRTASBuilderBuildExt function ze_rtas_aabb_ext_t* pBoundsOut; ///< [out] destination buffer to write AABB bounds to } ze_rtas_geometry_aabbs_ext_cb_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function pointer type to return AABBs for a range of /// procedural primitives typedef void (*ze_rtas_geometry_aabbs_cb_ext_t)( ze_rtas_geometry_aabbs_ext_cb_params_t* params ///< [in] callback function parameters structure ); /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder procedural primitives /// geometry info /// /// @details /// - A host-side bounds callback function is invoked by the acceleration /// structure builder to query the bounds of procedural primitives on /// demand. The callback is passed some `pGeomUserPtr` that can point to /// an application-side representation of the procedural primitives. /// Further, a second `pBuildUserPtr`, which is set by a parameter to /// ::zeRTASBuilderBuildExt, is passed to the callback. This allows the /// build to change the bounds of the procedural geometry, for example, to /// build a BVH only over a short time range to implement multi-segment /// motion blur. typedef struct _ze_rtas_builder_procedural_geometry_info_ext_t { ze_rtas_builder_packed_geometry_type_ext_t geometryType; ///< [in] geometry type, must be ///< ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_PROCEDURAL ze_rtas_builder_packed_geometry_ext_flags_t geometryFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_ext_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking uint8_t reserved; ///< [in] reserved for future use uint32_t primCount; ///< [in] number of primitives in geometry ze_rtas_geometry_aabbs_cb_ext_t pfnGetBoundsCb; ///< [in] pointer to callback function to get the axis-aligned bounding-box ///< for a range of primitives void* pGeomUserPtr; ///< [in] user data pointer passed to callback } ze_rtas_builder_procedural_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder instance geometry info typedef struct _ze_rtas_builder_instance_geometry_info_ext_t { ze_rtas_builder_packed_geometry_type_ext_t geometryType; ///< [in] geometry type, must be ///< ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_INSTANCE ze_rtas_builder_packed_instance_ext_flags_t instanceFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_ext_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking ze_rtas_builder_packed_input_data_format_ext_t transformFormat; ///< [in] format of the specified transformation uint32_t instanceUserID; ///< [in] user-specified identifier for the instance void* pTransform; ///< [in] object-to-world instance transformation in specified format ze_rtas_aabb_ext_t* pBounds; ///< [in] object-space axis-aligned bounding-box of the instanced ///< acceleration structure void* pAccelerationStructure; ///< [in] device pointer to acceleration structure to instantiate } ze_rtas_builder_instance_geometry_info_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief typedef struct _ze_rtas_builder_build_op_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_format_ext_t rtasFormat; ///< [in] ray tracing acceleration structure format ze_rtas_builder_build_quality_hint_ext_t buildQuality; ///< [in] acceleration structure build quality hint ze_rtas_builder_build_op_ext_flags_t buildFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_build_op_ext_flag_t ///< flags const ze_rtas_builder_geometry_info_ext_t** ppGeometries; ///< [in][optional][range(0, `numGeometries`)] NULL or a valid array of ///< pointers to geometry infos uint32_t numGeometries; ///< [in] number of geometries in geometry infos array, can be zero when ///< `ppGeometries` is NULL } ze_rtas_builder_build_op_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a ray tracing acceleration structure builder object /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support ::ZE_extension_rtas extension. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pDescriptor` /// + `nullptr == phBuilder` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_BUILDER_EXT_VERSION_CURRENT < pDescriptor->builderVersion` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExt( ze_driver_handle_t hDriver, ///< [in] handle of driver object const ze_rtas_builder_ext_desc_t* pDescriptor, ///< [in] pointer to builder descriptor ze_rtas_builder_ext_handle_t* phBuilder ///< [out] handle of builder object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves ray tracing acceleration structure builder properties /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hBuilder` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pBuildOpDescriptor` /// + `nullptr == pProperties` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_FORMAT_EXT_MAX < pBuildOpDescriptor->rtasFormat` /// + `::ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_HIGH < pBuildOpDescriptor->buildQuality` /// + `0x3 < pBuildOpDescriptor->buildFlags` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExt( ze_rtas_builder_ext_handle_t hBuilder, ///< [in] handle of builder object const ze_rtas_builder_build_op_ext_desc_t* pBuildOpDescriptor, ///< [in] pointer to build operation descriptor ze_rtas_builder_ext_properties_t* pProperties ///< [in,out] query result for builder properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Checks ray tracing acceleration structure format compatibility /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_FORMAT_EXT_MAX < rtasFormatA` /// + `::ZE_RTAS_FORMAT_EXT_MAX < rtasFormatB` /// - ::ZE_RESULT_SUCCESS /// + An acceleration structure built with `rtasFormatA` is compatible with devices that report `rtasFormatB`. /// - ::ZE_RESULT_EXT_ERROR_OPERANDS_INCOMPATIBLE /// + An acceleration structure built with `rtasFormatA` is **not** compatible with devices that report `rtasFormatB`. ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExt( ze_driver_handle_t hDriver, ///< [in] handle of driver object ze_rtas_format_ext_t rtasFormatA, ///< [in] operand A ze_rtas_format_ext_t rtasFormatB ///< [in] operand B ); /////////////////////////////////////////////////////////////////////////////// /// @brief Build ray tracing acceleration structure /// /// @details /// - This function builds an acceleration structure of the scene consisting /// of the specified geometry information and writes the acceleration /// structure to the provided destination buffer. All types of geometries /// can get freely mixed inside a scene. /// - Before an acceleration structure can be built, the user must allocate /// the memory for the acceleration structure buffer and scratch buffer /// using sizes queried with the ::zeRTASBuilderGetBuildPropertiesExt function. /// - When using the "worst-case" size for the acceleration structure /// buffer, the acceleration structure construction will never fail with ::ZE_RESULT_EXT_RTAS_BUILD_RETRY. /// - When using the "expected" size for the acceleration structure buffer, /// the acceleration structure construction may fail with /// ::ZE_RESULT_EXT_RTAS_BUILD_RETRY. If this happens, the user may resize /// their acceleration structure buffer using the returned /// `*pRtasBufferSizeBytes` value, which will be updated with an improved /// size estimate that will likely result in a successful build. /// - The acceleration structure construction is run on the host and is /// synchronous, thus after the function returns with a successful result, /// the acceleration structure may be used. /// - All provided data buffers must be host-accessible. The referenced /// scene data (index- and vertex- buffers) have to be accessible from the /// host, and will **not** be referenced by the build acceleration structure. /// - The acceleration structure buffer is typicall a host allocation that /// is later manually copied to a device allocation. Alternatively one can /// also use a shared USM allocation as acceration structure buffer and /// skip the copy. /// - A successfully constructed acceleration structure is entirely /// self-contained. There is no requirement for input data to persist /// beyond build completion. /// - A successfully constructed acceleration structure is non-copyable. /// - Acceleration structure construction may be parallelized by passing a /// valid handle to a parallel operation object and joining that parallel /// operation using ::zeRTASParallelOperationJoinExt with user-provided /// worker threads. /// - A successfully constructed acceleration structure is generally /// non-copyable. It can only get copied from host to device using the /// special ::zeRTASBuilderCommandListAppendCopyExt function. /// - **Additional Notes** /// - "The geometry infos array, geometry infos, and scratch buffer must /// all be standard host memory allocations." /// - "A pointer to a geometry info can be a null pointer, in which case /// the geometry is treated as empty." /// - "If no parallel operation handle is provided, the build is run /// sequentially on the current thread." /// - "A parallel operation object may only be associated with a single /// acceleration structure build at a time." /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hBuilder` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pBuildOpDescriptor` /// + `nullptr == pScratchBuffer` /// + `nullptr == pRtasBuffer` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_FORMAT_EXT_MAX < pBuildOpDescriptor->rtasFormat` /// + `::ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_HIGH < pBuildOpDescriptor->buildQuality` /// + `0x3 < pBuildOpDescriptor->buildFlags` /// - ::ZE_RESULT_EXT_RTAS_BUILD_DEFERRED /// + Acceleration structure build completion is deferred to parallel operation join. /// - ::ZE_RESULT_EXT_RTAS_BUILD_RETRY /// + Acceleration structure build failed due to insufficient resources, retry the build operation with a larger acceleration structure buffer allocation. /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE /// + Acceleration structure build failed due to parallel operation object participation in another build operation. ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExt( ze_rtas_builder_ext_handle_t hBuilder, ///< [in] handle of builder object const ze_rtas_builder_build_op_ext_desc_t* pBuildOpDescriptor, ///< [in] pointer to build operation descriptor void* pScratchBuffer, ///< [in][range(0, `scratchBufferSizeBytes`)] scratch buffer to be used ///< during acceleration structure construction size_t scratchBufferSizeBytes, ///< [in] size of scratch buffer, in bytes void* pRtasBuffer, ///< [in] pointer to destination buffer size_t rtasBufferSizeBytes, ///< [in] destination buffer size, in bytes ze_rtas_parallel_operation_ext_handle_t hParallelOperation, ///< [in][optional] handle to parallel operation object void* pBuildUserPtr, ///< [in][optional] pointer passed to callbacks ze_rtas_aabb_ext_t* pBounds, ///< [in,out][optional] pointer to destination address for acceleration ///< structure bounds size_t* pRtasBufferSizeBytes ///< [out][optional] updated acceleration structure size requirement, in ///< bytes ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copies a ray tracing acceleration structure (RTAS) from host to device /// memory. /// /// @details /// - The memory pointed to by srcptr must be host memory containing a valid /// ray tracing acceleration structure. /// - The number of bytes to copy must be larger or equal to the size of the /// ray tracing acceleration structure. /// - The application must ensure the memory pointed to by dstptr and srcptr /// is accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr and /// srcptr as they are free to be modified by either the Host or device up /// until execution. /// - The application must ensure the events are accessible by the device on /// which the command list was created. /// - The application must ensure the command list and events were created, /// and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// + `nullptr == srcptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderCommandListAppendCopyExt( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* dstptr, ///< [in] pointer to destination in device memory to copy the ray tracing ///< acceleration structure to const void* srcptr, ///< [in] pointer to a valid source ray tracing acceleration structure in ///< host memory to copy from size_t size, ///< [in] size in bytes to copy ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a ray tracing acceleration structure builder object /// /// @details /// - The implementation of this function may immediately release any /// internal Host and Device resources associated with this builder. /// - The application must **not** call this function from simultaneous /// threads with the same builder handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hBuilder` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExt( ze_rtas_builder_ext_handle_t hBuilder ///< [in][release] handle of builder object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a ray tracing acceleration structure builder parallel /// operation object /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support ::ZE_extension_rtas extension. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phParallelOperation` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExt( ze_driver_handle_t hDriver, ///< [in] handle of driver object ze_rtas_parallel_operation_ext_handle_t* phParallelOperation ///< [out] handle of parallel operation object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves ray tracing acceleration structure builder parallel /// operation properties /// /// @details /// - The application must first bind the parallel operation object to a /// build operation before it may query the parallel operation properties. /// In other words, the application must first call /// ::zeRTASBuilderBuildExt with **hParallelOperation** before calling /// this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hParallelOperation` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation, ///< [in] handle of parallel operation object ze_rtas_parallel_operation_ext_properties_t* pProperties ///< [in,out] query result for parallel operation properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Joins a parallel build operation /// /// @details /// - All worker threads return the same error code for the parallel build /// operation upon build completion /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hParallelOperation` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation ///< [in] handle of parallel operation object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a ray tracing acceleration structure builder parallel /// operation object /// /// @details /// - The implementation of this function may immediately release any /// internal Host and Device resources associated with this parallel /// operation. /// - The application must **not** call this function from simultaneous /// threads with the same parallel operation handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hParallelOperation` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation ///< [in][release] handle of parallel operation object to destroy ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for Device Vector Sizes Query #if !defined(__GNUC__) #pragma region deviceVectorSizes #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DEVICE_VECTOR_SIZES_EXT_NAME /// @brief Device Vector Sizes Query Extension Name #define ZE_DEVICE_VECTOR_SIZES_EXT_NAME "ZE_extension_device_vector_sizes" #endif // ZE_DEVICE_VECTOR_SIZES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Device Vector Sizes Query Extension Version(s) typedef enum _ze_device_vector_sizes_ext_version_t { ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_* ENUMs } ze_device_vector_sizes_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device Vector Width Properties queried using /// $DeviceGetVectorWidthPropertiesExt typedef struct _ze_device_vector_width_properties_ext_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t vector_width_size; ///< [out] The associated vector width size supported by the device. uint32_t preferred_vector_width_char; ///< [out] The preferred vector width size for char type supported by the device. uint32_t preferred_vector_width_short; ///< [out] The preferred vector width size for short type supported by the device. uint32_t preferred_vector_width_int; ///< [out] The preferred vector width size for int type supported by the device. uint32_t preferred_vector_width_long; ///< [out] The preferred vector width size for long type supported by the device. uint32_t preferred_vector_width_float; ///< [out] The preferred vector width size for float type supported by the device. uint32_t preferred_vector_width_double; ///< [out] The preferred vector width size for double type supported by the device. uint32_t preferred_vector_width_half; ///< [out] The preferred vector width size for half type supported by the device. uint32_t native_vector_width_char; ///< [out] The native vector width size for char type supported by the device. uint32_t native_vector_width_short; ///< [out] The native vector width size for short type supported by the device. uint32_t native_vector_width_int; ///< [out] The native vector width size for int type supported by the device. uint32_t native_vector_width_long; ///< [out] The native vector width size for long type supported by the device. uint32_t native_vector_width_float; ///< [out] The native vector width size for float type supported by the device. uint32_t native_vector_width_double; ///< [out] The native vector width size for double type supported by the device. uint32_t native_vector_width_half; ///< [out] The native vector width size for half type supported by the device. } ze_device_vector_width_properties_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves the vector width properties of the device. /// /// @details /// - Properties are reported for each vector width supported by the device. /// - Multiple calls to this function will return properties in the same /// order. /// - The number of vector width properties is reported thru the pCount /// parameter which is updated by the driver given pCount == 0. /// - The application may provide a buffer that is larger than the number of /// properties, but the application must set pCount to the number of /// properties to retrieve. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetVectorWidthPropertiesExt( ze_device_handle_t hDevice, ///< [in] handle of the device uint32_t* pCount, ///< [in,out] pointer to the number of vector width properties. ///< if count is zero, then the driver shall update the value with the ///< total number of vector width properties available. ///< if count is greater than the number of vector width properties ///< available, then the driver shall update the value with the correct ///< number of vector width properties available. ze_device_vector_width_properties_ext_t* pVectorWidthProperties ///< [in,out][optional][range(0, *pCount)] array of vector width properties. ///< if count is less than the number of properties available, then the ///< driver will return only the number requested. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Cache Reservation #if !defined(__GNUC__) #pragma region cacheReservation #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_CACHE_RESERVATION_EXT_NAME /// @brief Cache_Reservation Extension Name #define ZE_CACHE_RESERVATION_EXT_NAME "ZE_extension_cache_reservation" #endif // ZE_CACHE_RESERVATION_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Cache_Reservation Extension Version(s) typedef enum _ze_cache_reservation_ext_version_t { ZE_CACHE_RESERVATION_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_CACHE_RESERVATION_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_CACHE_RESERVATION_EXT_VERSION_* ENUMs } ze_cache_reservation_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Cache Reservation Region typedef enum _ze_cache_ext_region_t { ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT = 0, ///< [DEPRECATED] utilize driver default scheme. Use ///< ::ZE_CACHE_EXT_REGION_DEFAULT. ZE_CACHE_EXT_REGION_ZE_CACHE_RESERVE_REGION = 1, ///< [DEPRECATED] utilize reserved region. Use ///< ::ZE_CACHE_EXT_REGION_RESERVED. ZE_CACHE_EXT_REGION_ZE_CACHE_NON_RESERVED_REGION = 2, ///< [DEPRECATED] utilize non-reserverd region. Use ///< ::ZE_CACHE_EXT_REGION_NON_RESERVED. ZE_CACHE_EXT_REGION_DEFAULT = 0, ///< utilize driver default scheme ZE_CACHE_EXT_REGION_RESERVED = 1, ///< utilize reserved region ZE_CACHE_EXT_REGION_NON_RESERVED = 2, ///< utilize non-reserverd region ZE_CACHE_EXT_REGION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_CACHE_EXT_REGION_* ENUMs } ze_cache_ext_region_t; /////////////////////////////////////////////////////////////////////////////// /// @brief CacheReservation structure /// /// @details /// - This structure must be passed to ::zeDeviceGetCacheProperties via the /// `pNext` member of ::ze_device_cache_properties_t /// - Used for determining the max cache reservation allowed on device. Size /// of zero means no reservation available. typedef struct _ze_cache_reservation_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). size_t maxCacheReservationSize; ///< [out] max cache reservation size } ze_cache_reservation_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Reserve Cache on Device /// /// @details /// - The application may call this function but may not be successful as /// some other application may have reserve prior /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceReserveCacheExt( ze_device_handle_t hDevice, ///< [in] handle of the device object size_t cacheLevel, ///< [in] cache level where application want to reserve. If zero, then the ///< driver shall default to last level of cache and attempt to reserve in ///< that cache. size_t cacheReservationSize ///< [in] value for reserving size, in bytes. If zero, then the driver ///< shall remove prior reservation ); /////////////////////////////////////////////////////////////////////////////// /// @brief Assign VA section to use reserved section /// /// @details /// - The application may call this function to assign VA to particular /// reservartion region /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_CACHE_EXT_REGION_NON_RESERVED < cacheRegion` ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceSetCacheAdviceExt( ze_device_handle_t hDevice, ///< [in] handle of the device object void* ptr, ///< [in] memory pointer to query size_t regionSize, ///< [in] region size, in pages ze_cache_ext_region_t cacheRegion ///< [in] reservation region ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting event query timestamps. #if !defined(__GNUC__) #pragma region eventquerytimestamps #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_EVENT_QUERY_TIMESTAMPS_EXP_NAME /// @brief Event Query Timestamps Extension Name #define ZE_EVENT_QUERY_TIMESTAMPS_EXP_NAME "ZE_experimental_event_query_timestamps" #endif // ZE_EVENT_QUERY_TIMESTAMPS_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Event Query Timestamps Extension Version(s) typedef enum _ze_event_query_timestamps_exp_version_t { ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ),///< latest known version ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_* ENUMs } ze_event_query_timestamps_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Query event timestamps for a device or sub-device. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support /// ::ZE_experimental_event_query_timestamps. /// - The implementation must return all timestamps for the specified event /// and device pair. /// - The implementation must return all timestamps for all sub-devices when /// device handle is parent device. /// - The implementation may return all timestamps for sub-devices when /// device handle is sub-device or may return 0 for count. /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryTimestampsExp( ze_event_handle_t hEvent, ///< [in] handle of the event ze_device_handle_t hDevice, ///< [in] handle of the device to query uint32_t* pCount, ///< [in,out] pointer to the number of timestamp results. ///< if count is zero, then the driver shall update the value with the ///< total number of timestamps available. ///< if count is greater than the number of timestamps available, then the ///< driver shall update the value with the correct number of timestamps available. ze_kernel_timestamp_result_t* pTimestamps ///< [in,out][optional][range(0, *pCount)] array of timestamp results. ///< if count is less than the number of timestamps available, then driver ///< shall only retrieve that number of timestamps. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting image memory properties. #if !defined(__GNUC__) #pragma region imagememoryproperties #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_MEMORY_PROPERTIES_EXP_NAME /// @brief Image Memory Properties Extension Name #define ZE_IMAGE_MEMORY_PROPERTIES_EXP_NAME "ZE_experimental_image_memory_properties" #endif // ZE_IMAGE_MEMORY_PROPERTIES_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image Memory Properties Extension Version(s) typedef enum _ze_image_memory_properties_exp_version_t { ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_* ENUMs } ze_image_memory_properties_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image memory properties typedef struct _ze_image_memory_properties_exp_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t size; ///< [out] size of image allocation in bytes. uint64_t rowPitch; ///< [out] size of image row in bytes. uint64_t slicePitch; ///< [out] size of image slice in bytes. } ze_image_memory_properties_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Query image memory properties. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support /// ::ZE_experimental_image_memory_properties extension. /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pMemoryProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetMemoryPropertiesExp( ze_image_handle_t hImage, ///< [in] handle of image object ze_image_memory_properties_exp_t* pMemoryProperties ///< [in,out] query result for image memory properties. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting image views. #if !defined(__GNUC__) #pragma region imageview #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_VIEW_EXT_NAME /// @brief Image View Extension Name #define ZE_IMAGE_VIEW_EXT_NAME "ZE_extension_image_view" #endif // ZE_IMAGE_VIEW_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image View Extension Version(s) typedef enum _ze_image_view_ext_version_t { ZE_IMAGE_VIEW_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_VIEW_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_VIEW_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_VIEW_EXT_VERSION_* ENUMs } ze_image_view_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Create image view on the context. /// /// @details /// - The application must only use the image view for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support ::ZE_extension_image_view extension. /// - Image views are treated as images from the API. /// - Image views provide a mechanism to redescribe how an image is /// interpreted (e.g. different format). /// - Image views become disabled when their corresponding image resource is /// destroyed. /// - Use ::zeImageDestroy to destroy image view objects. /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// + `nullptr == hImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phImageView` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// + `::ZE_IMAGE_TYPE_BUFFER < desc->type` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT ZE_APIEXPORT ze_result_t ZE_APICALL zeImageViewCreateExt( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device const ze_image_desc_t* desc, ///< [in] pointer to image descriptor ze_image_handle_t hImage, ///< [in] handle of image object to create view from ze_image_handle_t* phImageView ///< [out] pointer to handle of image object created for view ); /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_VIEW_EXP_NAME /// @brief Image View Extension Name #define ZE_IMAGE_VIEW_EXP_NAME "ZE_experimental_image_view" #endif // ZE_IMAGE_VIEW_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image View Extension Version(s) typedef enum _ze_image_view_exp_version_t { ZE_IMAGE_VIEW_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_VIEW_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_VIEW_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_VIEW_EXP_VERSION_* ENUMs } ze_image_view_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Create image view on the context. /// /// @details /// - The application must only use the image view for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support ::ZE_experimental_image_view /// extension. /// - Image views are treated as images from the API. /// - Image views provide a mechanism to redescribe how an image is /// interpreted (e.g. different format). /// - Image views become disabled when their corresponding image resource is /// destroyed. /// - Use ::zeImageDestroy to destroy image view objects. /// - Note: This function is deprecated and replaced by /// ::zeImageViewCreateExt. /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` /// + `nullptr == hImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == phImageView` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < desc->flags` /// + `::ZE_IMAGE_TYPE_BUFFER < desc->type` /// - ::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT ZE_APIEXPORT ze_result_t ZE_APICALL zeImageViewCreateExp( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device const ze_image_desc_t* desc, ///< [in] pointer to image descriptor ze_image_handle_t hImage, ///< [in] handle of image object to create view from ze_image_handle_t* phImageView ///< [out] pointer to handle of image object created for view ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting image views for planar images. #if !defined(__GNUC__) #pragma region imageviewplanar #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_VIEW_PLANAR_EXT_NAME /// @brief Image View Planar Extension Name #define ZE_IMAGE_VIEW_PLANAR_EXT_NAME "ZE_extension_image_view_planar" #endif // ZE_IMAGE_VIEW_PLANAR_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image View Planar Extension Version(s) typedef enum _ze_image_view_planar_ext_version_t { ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_* ENUMs } ze_image_view_planar_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image view planar descriptor typedef struct _ze_image_view_planar_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t planeIndex; ///< [in] the 0-based plane index (e.g. NV12 is 0 = Y plane, 1 UV plane) } ze_image_view_planar_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_VIEW_PLANAR_EXP_NAME /// @brief Image View Planar Extension Name #define ZE_IMAGE_VIEW_PLANAR_EXP_NAME "ZE_experimental_image_view_planar" #endif // ZE_IMAGE_VIEW_PLANAR_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image View Planar Extension Version(s) typedef enum _ze_image_view_planar_exp_version_t { ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_* ENUMs } ze_image_view_planar_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image view planar descriptor typedef struct _ze_image_view_planar_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t planeIndex; ///< [DEPRECATED] no longer supported, use ///< ::ze_image_view_planar_ext_desc_t instead } ze_image_view_planar_exp_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for specifying kernel scheduling hints. #if !defined(__GNUC__) #pragma region kernelSchedulingHints #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_KERNEL_SCHEDULING_HINTS_EXP_NAME /// @brief Kernel Scheduling Hints Extension Name #define ZE_KERNEL_SCHEDULING_HINTS_EXP_NAME "ZE_experimental_scheduling_hints" #endif // ZE_KERNEL_SCHEDULING_HINTS_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel Scheduling Hints Extension Version(s) typedef enum _ze_scheduling_hints_exp_version_t { ZE_SCHEDULING_HINTS_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_SCHEDULING_HINTS_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_SCHEDULING_HINTS_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SCHEDULING_HINTS_EXP_VERSION_* ENUMs } ze_scheduling_hints_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported kernel scheduling hint flags typedef uint32_t ze_scheduling_hint_exp_flags_t; typedef enum _ze_scheduling_hint_exp_flag_t { ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST = ZE_BIT(0), ///< Hint that the kernel prefers oldest-first scheduling ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN = ZE_BIT(1), ///< Hint that the kernel prefers round-robin scheduling ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN = ZE_BIT(2), ///< Hint that the kernel prefers stall-based round-robin scheduling ZE_SCHEDULING_HINT_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SCHEDULING_HINT_EXP_FLAG_* ENUMs } ze_scheduling_hint_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device kernel scheduling hint properties queried using /// ::zeDeviceGetModuleProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetModuleProperties, via /// the `pNext` member of ::ze_device_module_properties_t. typedef struct _ze_scheduling_hint_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_scheduling_hint_exp_flags_t schedulingHintFlags; ///< [out] Supported kernel scheduling hints. ///< May be 0 (none) or a valid combination of ::ze_scheduling_hint_exp_flag_t. } ze_scheduling_hint_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel scheduling hint descriptor /// /// @details /// - This structure may be passed to ::zeKernelSchedulingHintExp. typedef struct _ze_scheduling_hint_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_scheduling_hint_exp_flags_t flags; ///< [in] flags specifying kernel scheduling hints. ///< must be 0 (default) or a valid combination of ::ze_scheduling_hint_exp_flag_t. } ze_scheduling_hint_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Provide kernel scheduling hints that may improve performance /// /// @details /// - The scheduling hints may improve performance only and are not required /// for correctness. /// - If a specified scheduling hint is unsupported it will be silently /// ignored. /// - If two conflicting scheduling hints are specified there is no defined behavior; /// the hints may be ignored or one hint may be chosen arbitrarily. /// - The application must not call this function from simultaneous threads /// with the same kernel handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hKernel` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pHint` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7 < pHint->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSchedulingHintExp( ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object ze_scheduling_hint_exp_desc_t* pHint ///< [in] pointer to kernel scheduling hint descriptor ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for One-Definition-Rule Linkage Types #if !defined(__GNUC__) #pragma region linkonceodr #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_LINKONCE_ODR_EXT_NAME /// @brief Linkonce ODR Extension Name #define ZE_LINKONCE_ODR_EXT_NAME "ZE_extension_linkonce_odr" #endif // ZE_LINKONCE_ODR_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Linkonce ODR Extension Version(s) typedef enum _ze_linkonce_odr_ext_version_t { ZE_LINKONCE_ODR_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_LINKONCE_ODR_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_LINKONCE_ODR_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_LINKONCE_ODR_EXT_VERSION_* ENUMs } ze_linkonce_odr_ext_version_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting power saving hint. #if !defined(__GNUC__) #pragma region powersavinghint #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_CONTEXT_POWER_SAVING_HINT_EXP_NAME /// @brief Power Saving Hint Extension Name #define ZE_CONTEXT_POWER_SAVING_HINT_EXP_NAME "ZE_experimental_power_saving_hint" #endif // ZE_CONTEXT_POWER_SAVING_HINT_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Power Saving Hint Extension Version(s) typedef enum _ze_power_saving_hint_exp_version_t { ZE_POWER_SAVING_HINT_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_POWER_SAVING_HINT_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_POWER_SAVING_HINT_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_POWER_SAVING_HINT_EXP_VERSION_* ENUMs } ze_power_saving_hint_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device types typedef enum _ze_power_saving_hint_type_t { ZE_POWER_SAVING_HINT_TYPE_MIN = 0, ///< Minumum power savings. The device will make no attempt to save power ///< while executing work submitted to this context. ZE_POWER_SAVING_HINT_TYPE_MAX = 100, ///< Maximum power savings. The device will do everything to bring power to ///< a minimum while executing work submitted to this context. ZE_POWER_SAVING_HINT_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_POWER_SAVING_HINT_TYPE_* ENUMs } ze_power_saving_hint_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Extended context descriptor containing power saving hint. typedef struct _ze_context_power_saving_hint_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t hint; ///< [in] power saving hint (default value = 0). This is value from [0,100] ///< and can use pre-defined settings from ::ze_power_saving_hint_type_t. } ze_context_power_saving_hint_exp_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Subgroups #if !defined(__GNUC__) #pragma region subgroups #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_SUBGROUPS_EXT_NAME /// @brief Subgroups Extension Name #define ZE_SUBGROUPS_EXT_NAME "ZE_extension_subgroups" #endif // ZE_SUBGROUPS_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Subgroups Extension Version(s) typedef enum _ze_subgroup_ext_version_t { ZE_SUBGROUP_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_SUBGROUP_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_SUBGROUP_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SUBGROUP_EXT_VERSION_* ENUMs } ze_subgroup_ext_version_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for EU Count #if !defined(__GNUC__) #pragma region EUCount #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_EU_COUNT_EXT_NAME /// @brief EU Count Extension Name #define ZE_EU_COUNT_EXT_NAME "ZE_extension_eu_count" #endif // ZE_EU_COUNT_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief EU Count Extension Version(s) typedef enum _ze_eu_count_ext_version_t { ZE_EU_COUNT_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_EU_COUNT_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_EU_COUNT_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EU_COUNT_EXT_VERSION_* ENUMs } ze_eu_count_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief EU count queried using ::zeDeviceGetProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetProperties via the /// `pNext` member of ::ze_device_properties_t. /// - Used for determining the total number of EUs available on device. typedef struct _ze_eu_count_ext_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t numTotalEUs; ///< [out] Total number of EUs available } ze_eu_count_ext_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for PCI Properties #if !defined(__GNUC__) #pragma region PCIProperties #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_PCI_PROPERTIES_EXT_NAME /// @brief PCI Properties Extension Name #define ZE_PCI_PROPERTIES_EXT_NAME "ZE_extension_pci_properties" #endif // ZE_PCI_PROPERTIES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief PCI Properties Extension Version(s) typedef enum _ze_pci_properties_ext_version_t { ZE_PCI_PROPERTIES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_PCI_PROPERTIES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_PCI_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_PCI_PROPERTIES_EXT_VERSION_* ENUMs } ze_pci_properties_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device PCI address /// /// @details /// - This structure may be passed to ::zeDevicePciGetPropertiesExt as an /// attribute of ::ze_pci_ext_properties_t. /// - A PCI BDF address is the bus:device:function address of the device and /// is useful for locating the device in the PCI switch fabric. typedef struct _ze_pci_address_ext_t { uint32_t domain; ///< [out] PCI domain number uint32_t bus; ///< [out] PCI BDF bus number uint32_t device; ///< [out] PCI BDF device number uint32_t function; ///< [out] PCI BDF function number } ze_pci_address_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device PCI speed typedef struct _ze_pci_speed_ext_t { int32_t genVersion; ///< [out] The link generation. A value of -1 means that this property is ///< unknown. int32_t width; ///< [out] The number of lanes. A value of -1 means that this property is ///< unknown. int64_t maxBandwidth; ///< [out] The theoretical maximum bandwidth in bytes/sec (sum of all ///< lanes). A value of -1 means that this property is unknown. } ze_pci_speed_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Static PCI properties typedef struct _ze_pci_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_pci_address_ext_t address; ///< [out] The BDF address ze_pci_speed_ext_t maxSpeed; ///< [out] Fastest port configuration supported by the device (sum of all ///< lanes) } ze_pci_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Get PCI properties - address, max speed /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pPciProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeDevicePciGetPropertiesExt( ze_device_handle_t hDevice, ///< [in] handle of the device object. ze_pci_ext_properties_t* pPciProperties ///< [in,out] returns the PCI properties of the device. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for sRGB #if !defined(__GNUC__) #pragma region SRGB #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_SRGB_EXT_NAME /// @brief sRGB Extension Name #define ZE_SRGB_EXT_NAME "ZE_extension_srgb" #endif // ZE_SRGB_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief sRGB Extension Version(s) typedef enum _ze_srgb_ext_version_t { ZE_SRGB_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_SRGB_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_SRGB_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SRGB_EXT_VERSION_* ENUMs } ze_srgb_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief sRGB image descriptor /// /// @details /// - This structure may be passed to ::zeImageCreate via the `pNext` member /// of ::ze_image_desc_t /// - Used for specifying that the image is in sRGB format. typedef struct _ze_srgb_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_bool_t sRGB; ///< [in] Is sRGB. } ze_srgb_ext_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Image Copy To/From Memory #if !defined(__GNUC__) #pragma region imageCopy #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_COPY_EXT_NAME /// @brief Image Copy Extension Name #define ZE_IMAGE_COPY_EXT_NAME "ZE_extension_image_copy" #endif // ZE_IMAGE_COPY_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image Copy Extension Version(s) typedef enum _ze_image_copy_ext_version_t { ZE_IMAGE_COPY_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_COPY_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_COPY_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_COPY_EXT_VERSION_* ENUMs } ze_image_copy_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Copies from an image to device or shared memory. /// /// @details /// - The application must ensure the memory pointed to by dstptr is /// accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by dstptr as /// it is free to be modified by either the Host or device up until /// execution. /// - The application must ensure the image and events are accessible by the /// device on which the command list was created. /// - The application must ensure the image format descriptor for the source /// image is a single-planar format. /// - The application must ensure that the rowPitch is set to 0 if image is /// a 1D image. Otherwise the rowPitch must be greater than or equal to /// the element size in bytes x width. /// - If rowPitch is set to 0, the appropriate row pitch is calculated based /// on the size of each element in bytes multiplied by width /// - The application must ensure that the slicePitch is set to 0 if image /// is a 1D or 2D image. Otherwise this value must be greater than or /// equal to rowPitch x height. /// - If slicePitch is set to 0, the appropriate slice pitch is calculated /// based on the rowPitch x height. /// - The application must ensure the command list, image and events were /// created, and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clEnqueueReadImage /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hSrcImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == dstptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyToMemoryExt( ze_command_list_handle_t hCommandList, ///< [in] handle of command list void* dstptr, ///< [in] pointer to destination memory to copy to ze_image_handle_t hSrcImage, ///< [in] handle of source image to copy from const ze_image_region_t* pSrcRegion, ///< [in][optional] source region descriptor uint32_t destRowPitch, ///< [in] size in bytes of the 1D slice of the 2D region of a 2D or 3D ///< image or each image of a 1D or 2D image array being written uint32_t destSlicePitch, ///< [in] size in bytes of the 2D slice of the 3D region of a 3D image or ///< each image of a 1D or 2D image array being written ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Copies to an image from device or shared memory. /// /// @details /// - The application must ensure the memory pointed to by srcptr is /// accessible by the device on which the command list was created. /// - The implementation must not access the memory pointed to by srcptr as /// it is free to be modified by either the Host or device up until /// execution. /// - The application must ensure the image and events are accessible by the /// device on which the command list was created. /// - The application must ensure the image format descriptor for the /// destination image is a single-planar format. /// - The application must ensure that the rowPitch is set to 0 if image is /// a 1D image. Otherwise the rowPitch must be greater than or equal to /// the element size in bytes x width. /// - If rowPitch is set to 0, the appropriate row pitch is calculated based /// on the size of each element in bytes multiplied by width /// - The application must ensure that the slicePitch is set to 0 if image /// is a 1D or 2D image. Otherwise this value must be greater than or /// equal to rowPitch x height. /// - If slicePitch is set to 0, the appropriate slice pitch is calculated /// based on the rowPitch x height. /// - The application must ensure the command list, image and events were /// created, and the memory was allocated, on the same context. /// - The application must **not** call this function from simultaneous /// threads with the same command list handle. /// - The implementation of this function should be lock-free. /// /// @remarks /// _Analogues_ /// - clEnqueueWriteImage /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// + `nullptr == hDstImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == srcptr` /// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyFromMemoryExt( ze_command_list_handle_t hCommandList, ///< [in] handle of command list ze_image_handle_t hDstImage, ///< [in] handle of destination image to copy to const void* srcptr, ///< [in] pointer to source memory to copy from const ze_image_region_t* pDstRegion, ///< [in][optional] destination region descriptor uint32_t srcRowPitch, ///< [in] size in bytes of the 1D slice of the 2D region of a 2D or 3D ///< image or each image of a 1D or 2D image array being read uint32_t srcSlicePitch, ///< [in] size in bytes of the 2D slice of the 3D region of a 3D image or ///< each image of a 1D or 2D image array being read ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching; must be 0 ///< if `nullptr == phWaitEvents` ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for Querying Image Allocation Properties. #if !defined(__GNUC__) #pragma region imageQueryAllocProperties #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_NAME /// @brief Image Query Allocation Properties Extension Name #define ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_NAME "ZE_extension_image_query_alloc_properties" #endif // ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Image Query Allocation Properties Extension Version(s) typedef enum _ze_image_query_alloc_properties_ext_version_t { ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_* ENUMs } ze_image_query_alloc_properties_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image allocation properties queried using /// ::zeImageGetAllocPropertiesExt typedef struct _ze_image_allocation_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t id; ///< [out] identifier for this allocation } ze_image_allocation_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves attributes of an image allocation /// /// @details /// - The application may call this function from simultaneous threads. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pImageAllocProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetAllocPropertiesExt( ze_context_handle_t hContext, ///< [in] handle of the context object ze_image_handle_t hImage, ///< [in] handle of image object to query ze_image_allocation_ext_properties_t* pImageAllocProperties ///< [in,out] query result for image allocation properties ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Linkage Inspection #if !defined(__GNUC__) #pragma region linkageInspection #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_LINKAGE_INSPECTION_EXT_NAME /// @brief Linkage Inspection Extension Name #define ZE_LINKAGE_INSPECTION_EXT_NAME "ZE_extension_linkage_inspection" #endif // ZE_LINKAGE_INSPECTION_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Linkage Inspection Extension Version(s) typedef enum _ze_linkage_inspection_ext_version_t { ZE_LINKAGE_INSPECTION_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_LINKAGE_INSPECTION_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_LINKAGE_INSPECTION_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_LINKAGE_INSPECTION_EXT_VERSION_* ENUMs } ze_linkage_inspection_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported module linkage inspection flags typedef uint32_t ze_linkage_inspection_ext_flags_t; typedef enum _ze_linkage_inspection_ext_flag_t { ZE_LINKAGE_INSPECTION_EXT_FLAG_IMPORTS = ZE_BIT(0), ///< List all imports of modules ZE_LINKAGE_INSPECTION_EXT_FLAG_UNRESOLVABLE_IMPORTS = ZE_BIT(1), ///< List all imports of modules that do not have a corresponding export ZE_LINKAGE_INSPECTION_EXT_FLAG_EXPORTS = ZE_BIT(2), ///< List all exports of modules ZE_LINKAGE_INSPECTION_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_LINKAGE_INSPECTION_EXT_FLAG_* ENUMs } ze_linkage_inspection_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Module linkage inspection descriptor /// /// @details /// - This structure may be passed to ::zeModuleInspectLinkageExt. typedef struct _ze_linkage_inspection_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_linkage_inspection_ext_flags_t flags; ///< [in] flags specifying module linkage inspection. ///< must be 0 (default) or a valid combination of ::ze_linkage_inspection_ext_flag_t. } ze_linkage_inspection_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief List Imports & Exports /// /// @details /// - List all the import & unresolveable import dependencies & exports of a /// set of modules /// /// @remarks /// _Analogues_ /// - None /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pInspectDesc` /// + `nullptr == phModules` /// + `nullptr == phLog` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x7 < pInspectDesc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleInspectLinkageExt( ze_linkage_inspection_ext_desc_t* pInspectDesc, ///< [in] pointer to linkage inspection descriptor structure. uint32_t numModules, ///< [in] number of modules to be inspected pointed to by phModules. ze_module_handle_t* phModules, ///< [in][range(0, numModules)] pointer to an array of modules to be ///< inspected for import dependencies. ze_module_build_log_handle_t* phLog ///< [out] pointer to handle of linkage inspection log. Log object will ///< contain separate lists of imports, un-resolvable imports, and exports. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting memory compression hints. #if !defined(__GNUC__) #pragma region memoryCompressionHints #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MEMORY_COMPRESSION_HINTS_EXT_NAME /// @brief Memory Compression Hints Extension Name #define ZE_MEMORY_COMPRESSION_HINTS_EXT_NAME "ZE_extension_memory_compression_hints" #endif // ZE_MEMORY_COMPRESSION_HINTS_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Memory Compression Hints Extension Version(s) typedef enum _ze_memory_compression_hints_ext_version_t { ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_* ENUMs } ze_memory_compression_hints_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported memory compression hints flags typedef uint32_t ze_memory_compression_hints_ext_flags_t; typedef enum _ze_memory_compression_hints_ext_flag_t { ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED = ZE_BIT(0), ///< Hint Driver implementation to make allocation compressible ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED = ZE_BIT(1), ///< Hint Driver implementation to make allocation not compressible ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_* ENUMs } ze_memory_compression_hints_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Compression hints memory allocation descriptor /// /// @details /// - This structure may be passed to ::zeMemAllocShared or /// ::zeMemAllocDevice, via the `pNext` member of /// ::ze_device_mem_alloc_desc_t. /// - This structure may be passed to ::zeMemAllocHost, via the `pNext` /// member of ::ze_host_mem_alloc_desc_t. /// - This structure may be passed to ::zeImageCreate, via the `pNext` /// member of ::ze_image_desc_t. typedef struct _ze_memory_compression_hints_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_memory_compression_hints_ext_flags_t flags; ///< [in] flags specifying if allocation should be compressible or not. ///< Must be set to one of the ::ze_memory_compression_hints_ext_flag_t; } ze_memory_compression_hints_ext_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Memory Free Policies #if !defined(__GNUC__) #pragma region memoryFreePolicies #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MEMORY_FREE_POLICIES_EXT_NAME /// @brief Memory Free Policies Extension Name #define ZE_MEMORY_FREE_POLICIES_EXT_NAME "ZE_extension_memory_free_policies" #endif // ZE_MEMORY_FREE_POLICIES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Memory Free Policies Extension Version(s) typedef enum _ze_memory_free_policies_ext_version_t { ZE_MEMORY_FREE_POLICIES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_MEMORY_FREE_POLICIES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_MEMORY_FREE_POLICIES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MEMORY_FREE_POLICIES_EXT_VERSION_* ENUMs } ze_memory_free_policies_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported memory free policy capability flags typedef uint32_t ze_driver_memory_free_policy_ext_flags_t; typedef enum _ze_driver_memory_free_policy_ext_flag_t { ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE = ZE_BIT(0), ///< Blocks until all commands using the memory are complete before ///< scheduling memory to be freed. Does not guarantee memory is freed upon ///< return, only that it is safe and is scheduled to be freed. Actual ///< freeing of memory is specific to user mode driver and kernel mode ///< driver implementation and may be done asynchronously. ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE = ZE_BIT(1), ///< Immediately schedules the memory to be freed and returns without ///< blocking. Memory may be freed after all commands using the memory are ///< complete. Actual freeing of memory is specific to user mode driver and ///< kernel mode driver implementation and may be done asynchronously. ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_* ENUMs } ze_driver_memory_free_policy_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Driver memory free properties queried using ::zeDriverGetProperties /// /// @details /// - All drivers must support an immediate free policy, which is the /// default free policy. /// - This structure may be returned from ::zeDriverGetProperties, via the /// `pNext` member of ::ze_driver_properties_t. typedef struct _ze_driver_memory_free_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_driver_memory_free_policy_ext_flags_t freePolicies; ///< [out] Supported memory free policies. ///< must be 0 or a combination of ::ze_driver_memory_free_policy_ext_flag_t. } ze_driver_memory_free_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Memory free descriptor with free policy typedef struct _ze_memory_free_ext_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_driver_memory_free_policy_ext_flags_t freePolicy; ///< [in] flags specifying the memory free policy. ///< must be 0 (default) or a supported ::ze_driver_memory_free_policy_ext_flag_t; ///< default behavior is to free immediately. } ze_memory_free_ext_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Frees allocated host memory, device memory, or shared memory on the /// context using the specified free policy. /// /// @details /// - Similar to zeMemFree, with added parameter to choose the free policy. /// - Does not gaurantee memory is freed upon return. See free policy /// descriptions for details. /// - The application must **not** call this function from simultaneous /// threads with the same pointer. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pMemFreeDesc` /// + `nullptr == ptr` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0x3 < pMemFreeDesc->freePolicy` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemFreeExt( ze_context_handle_t hContext, ///< [in] handle of the context object const ze_memory_free_ext_desc_t* pMemFreeDesc, ///< [in] pointer to memory free descriptor void* ptr ///< [in][release] pointer to memory to free ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Bandwidth #if !defined(__GNUC__) #pragma region bandwidth #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_BANDWIDTH_PROPERTIES_EXP_NAME /// @brief Bandwidth Extension Name #define ZE_BANDWIDTH_PROPERTIES_EXP_NAME "ZE_experimental_bandwidth_properties" #endif // ZE_BANDWIDTH_PROPERTIES_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief P2P Bandwidth Properties /// /// @details /// - This structure may be passed to ::zeDeviceGetP2PProperties by having /// the pNext member of ::ze_device_p2p_properties_t point at this struct. typedef struct _ze_device_p2p_bandwidth_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t logicalBandwidth; ///< [out] total logical design bandwidth for all links connecting the two ///< devices uint32_t physicalBandwidth; ///< [out] total physical design bandwidth for all links connecting the two ///< devices ze_bandwidth_unit_t bandwidthUnit; ///< [out] bandwidth unit uint32_t logicalLatency; ///< [out] average logical design latency for all links connecting the two ///< devices uint32_t physicalLatency; ///< [out] average physical design latency for all links connecting the two ///< devices ze_latency_unit_t latencyUnit; ///< [out] latency unit } ze_device_p2p_bandwidth_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Copy Bandwidth Properties /// /// @details /// - This structure may be passed to /// ::zeDeviceGetCommandQueueGroupProperties by having the pNext member of /// ::ze_command_queue_group_properties_t point at this struct. typedef struct _ze_copy_bandwidth_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t copyBandwidth; ///< [out] design bandwidth supported by this engine type for copy ///< operations ze_bandwidth_unit_t copyBandwidthUnit; ///< [out] copy bandwidth unit } ze_copy_bandwidth_exp_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Device Local Identifier (LUID) #if !defined(__GNUC__) #pragma region deviceLUID #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DEVICE_LUID_EXT_NAME /// @brief Device Local Identifier (LUID) Extension Name #define ZE_DEVICE_LUID_EXT_NAME "ZE_extension_device_luid" #endif // ZE_DEVICE_LUID_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Device Local Identifier (LUID) Extension Version(s) typedef enum _ze_device_luid_ext_version_t { ZE_DEVICE_LUID_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_DEVICE_LUID_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_DEVICE_LUID_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_LUID_EXT_VERSION_* ENUMs } ze_device_luid_ext_version_t; /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_DEVICE_LUID_SIZE_EXT /// @brief Maximum device local identifier (LUID) size in bytes #define ZE_MAX_DEVICE_LUID_SIZE_EXT 8 #endif // ZE_MAX_DEVICE_LUID_SIZE_EXT /////////////////////////////////////////////////////////////////////////////// /// @brief Device local identifier (LUID) typedef struct _ze_device_luid_ext_t { uint8_t id[ZE_MAX_DEVICE_LUID_SIZE_EXT]; ///< [out] opaque data representing a device LUID } ze_device_luid_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device LUID properties queried using ::zeDeviceGetProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetProperties, via the /// `pNext` member of ::ze_device_properties_t. typedef struct _ze_device_luid_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_luid_ext_t luid; ///< [out] locally unique identifier (LUID). ///< The returned LUID can be cast to a LUID object and must be equal to ///< the locally ///< unique identifier of an IDXGIAdapter1 object that corresponds to the device. uint32_t nodeMask; ///< [out] node mask. ///< The returned node mask must contain exactly one bit. ///< If the device is running on an operating system that supports the ///< Direct3D 12 API ///< and the device corresponds to an individual device in a linked device ///< adapter, the ///< returned node mask identifies the Direct3D 12 node corresponding to ///< the device. ///< Otherwise, the returned node mask must be 1. } ze_device_luid_ext_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Fabric Topology Discovery #if !defined(__GNUC__) #pragma region fabric #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_FABRIC_EXP_NAME /// @brief Fabric Topology Discovery Extension Name #define ZE_FABRIC_EXP_NAME "ZE_experimental_fabric" #endif // ZE_FABRIC_EXP_NAME /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE /// @brief Maximum fabric edge model string size #define ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE 256 #endif // ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE /////////////////////////////////////////////////////////////////////////////// /// @brief Fabric Vertex types typedef enum _ze_fabric_vertex_exp_type_t { ZE_FABRIC_VERTEX_EXP_TYPE_UNKNOWN = 0, ///< Fabric vertex type is unknown ZE_FABRIC_VERTEX_EXP_TYPE_DEVICE = 1, ///< Fabric vertex represents a device ZE_FABRIC_VERTEX_EXP_TYPE_SUBDEVICE = 2, ///< Fabric vertex represents a subdevice ZE_FABRIC_VERTEX_EXP_TYPE_SWITCH = 3, ///< Fabric vertex represents a switch ZE_FABRIC_VERTEX_EXP_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_FABRIC_VERTEX_EXP_TYPE_* ENUMs } ze_fabric_vertex_exp_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Fabric edge duplexity typedef enum _ze_fabric_edge_exp_duplexity_t { ZE_FABRIC_EDGE_EXP_DUPLEXITY_UNKNOWN = 0, ///< Fabric edge duplexity is unknown ZE_FABRIC_EDGE_EXP_DUPLEXITY_HALF_DUPLEX = 1, ///< Fabric edge is half duplex, i.e. stated bandwidth is obtained in only ///< one direction at time ZE_FABRIC_EDGE_EXP_DUPLEXITY_FULL_DUPLEX = 2, ///< Fabric edge is full duplex, i.e. stated bandwidth is supported in both ///< directions simultaneously ZE_FABRIC_EDGE_EXP_DUPLEXITY_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_FABRIC_EDGE_EXP_DUPLEXITY_* ENUMs } ze_fabric_edge_exp_duplexity_t; /////////////////////////////////////////////////////////////////////////////// /// @brief PCI address /// /// @details /// - A PCI BDF address is the bus:device:function address of the device and /// is useful for locating the device in the PCI switch fabric. typedef struct _ze_fabric_vertex_pci_exp_address_t { uint32_t domain; ///< [out] PCI domain number uint32_t bus; ///< [out] PCI BDF bus number uint32_t device; ///< [out] PCI BDF device number uint32_t function; ///< [out] PCI BDF function number } ze_fabric_vertex_pci_exp_address_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Fabric Vertex properties typedef struct _ze_fabric_vertex_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_uuid_t uuid; ///< [out] universal unique identifier. If the vertex is co-located with a ///< device/subdevice, then this uuid will match that of the corresponding ///< device/subdevice ze_fabric_vertex_exp_type_t type; ///< [out] does the fabric vertex represent a device, subdevice, or switch? ze_bool_t remote; ///< [out] does the fabric vertex live on the local node or on a remote ///< node? ze_fabric_vertex_pci_exp_address_t address; ///< [out] B/D/F address of fabric vertex & associated device/subdevice if ///< available } ze_fabric_vertex_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Fabric Edge properties typedef struct _ze_fabric_edge_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_uuid_t uuid; ///< [out] universal unique identifier. char model[ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE]; ///< [out] Description of fabric edge technology. Will be set to the string ///< "unkown" if this cannot be determined for this edge uint32_t bandwidth; ///< [out] design bandwidth ze_bandwidth_unit_t bandwidthUnit; ///< [out] bandwidth unit uint32_t latency; ///< [out] design latency ze_latency_unit_t latencyUnit; ///< [out] latency unit ze_fabric_edge_exp_duplexity_t duplexity; ///< [out] Duplexity of the fabric edge } ze_fabric_edge_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves fabric vertices within a driver /// /// @details /// - A fabric vertex represents either a device or a switch connected to /// other fabric vertices. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricVertexGetExp( ze_driver_handle_t hDriver, ///< [in] handle of the driver instance uint32_t* pCount, ///< [in,out] pointer to the number of fabric vertices. ///< if count is zero, then the driver shall update the value with the ///< total number of fabric vertices available. ///< if count is greater than the number of fabric vertices available, then ///< the driver shall update the value with the correct number of fabric ///< vertices available. ze_fabric_vertex_handle_t* phVertices ///< [in,out][optional][range(0, *pCount)] array of handle of fabric vertices. ///< if count is less than the number of fabric vertices available, then ///< driver shall only retrieve that number of fabric vertices. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves a fabric sub-vertex from a fabric vertex /// /// @details /// - Multiple calls to this function will return identical fabric vertex /// handles, in the same order. /// - The number of handles returned from this function is affected by the /// ::ZE_AFFINITY_MASK environment variable. /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hVertex` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricVertexGetSubVerticesExp( ze_fabric_vertex_handle_t hVertex, ///< [in] handle of the fabric vertex object uint32_t* pCount, ///< [in,out] pointer to the number of sub-vertices. ///< if count is zero, then the driver shall update the value with the ///< total number of sub-vertices available. ///< if count is greater than the number of sub-vertices available, then ///< the driver shall update the value with the correct number of ///< sub-vertices available. ze_fabric_vertex_handle_t* phSubvertices ///< [in,out][optional][range(0, *pCount)] array of handle of sub-vertices. ///< if count is less than the number of sub-vertices available, then ///< driver shall only retrieve that number of sub-vertices. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves properties of the fabric vertex. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hVertex` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pVertexProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricVertexGetPropertiesExp( ze_fabric_vertex_handle_t hVertex, ///< [in] handle of the fabric vertex ze_fabric_vertex_exp_properties_t* pVertexProperties ///< [in,out] query result for fabric vertex properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns device handle from fabric vertex handle. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hVertex` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phDevice` /// - ::ZE_RESULT_EXP_ERROR_VERTEX_IS_NOT_DEVICE /// + Provided fabric vertex handle does not correspond to a device or subdevice. /// - ::ZE_RESULT_EXP_ERROR_REMOTE_DEVICE /// + Provided fabric vertex handle corresponds to remote device or subdevice. ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricVertexGetDeviceExp( ze_fabric_vertex_handle_t hVertex, ///< [in] handle of the fabric vertex ze_device_handle_t* phDevice ///< [out] device handle corresponding to fabric vertex ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns fabric vertex handle from device handle. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phVertex` /// - ::ZE_RESULT_EXP_ERROR_DEVICE_IS_NOT_VERTEX /// + Provided device handle does not correspond to a fabric vertex. ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetFabricVertexExp( ze_device_handle_t hDevice, ///< [in] handle of the device ze_fabric_vertex_handle_t* phVertex ///< [out] fabric vertex handle corresponding to device ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves all fabric edges between provided pair of fabric vertices /// /// @details /// - A fabric edge represents one or more physical links between two fabric /// vertices. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hVertexA` /// + `nullptr == hVertexB` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricEdgeGetExp( ze_fabric_vertex_handle_t hVertexA, ///< [in] handle of first fabric vertex instance ze_fabric_vertex_handle_t hVertexB, ///< [in] handle of second fabric vertex instance uint32_t* pCount, ///< [in,out] pointer to the number of fabric edges. ///< if count is zero, then the driver shall update the value with the ///< total number of fabric edges available. ///< if count is greater than the number of fabric edges available, then ///< the driver shall update the value with the correct number of fabric ///< edges available. ze_fabric_edge_handle_t* phEdges ///< [in,out][optional][range(0, *pCount)] array of handle of fabric edges. ///< if count is less than the number of fabric edges available, then ///< driver shall only retrieve that number of fabric edges. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves fabric vertices connected by a fabric edge /// /// @details /// - A fabric vertex represents either a device or a switch connected to /// other fabric vertices via a fabric edge. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEdge` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phVertexA` /// + `nullptr == phVertexB` ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricEdgeGetVerticesExp( ze_fabric_edge_handle_t hEdge, ///< [in] handle of the fabric edge instance ze_fabric_vertex_handle_t* phVertexA, ///< [out] fabric vertex connected to one end of the given fabric edge. ze_fabric_vertex_handle_t* phVertexB ///< [out] fabric vertex connected to other end of the given fabric edge. ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves properties of the fabric edge. /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEdge` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pEdgeProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeFabricEdgeGetPropertiesExp( ze_fabric_edge_handle_t hEdge, ///< [in] handle of the fabric edge ze_fabric_edge_exp_properties_t* pEdgeProperties ///< [in,out] query result for fabric edge properties ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Device Memory Properties #if !defined(__GNUC__) #pragma region memoryProperties #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DEVICE_MEMORY_PROPERTIES_EXT_NAME /// @brief Device Memory Properties Extension Name #define ZE_DEVICE_MEMORY_PROPERTIES_EXT_NAME "ZE_extension_device_memory_properties" #endif // ZE_DEVICE_MEMORY_PROPERTIES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Device Memory Properties Extension Version(s) typedef enum _ze_device_memory_properties_ext_version_t { ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_* ENUMs } ze_device_memory_properties_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Memory module types typedef enum _ze_device_memory_ext_type_t { ZE_DEVICE_MEMORY_EXT_TYPE_HBM = 0, ///< HBM memory ZE_DEVICE_MEMORY_EXT_TYPE_HBM2 = 1, ///< HBM2 memory ZE_DEVICE_MEMORY_EXT_TYPE_DDR = 2, ///< DDR memory ZE_DEVICE_MEMORY_EXT_TYPE_DDR2 = 3, ///< DDR2 memory ZE_DEVICE_MEMORY_EXT_TYPE_DDR3 = 4, ///< DDR3 memory ZE_DEVICE_MEMORY_EXT_TYPE_DDR4 = 5, ///< DDR4 memory ZE_DEVICE_MEMORY_EXT_TYPE_DDR5 = 6, ///< DDR5 memory ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR = 7, ///< LPDDR memory ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR3 = 8, ///< LPDDR3 memory ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR4 = 9, ///< LPDDR4 memory ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR5 = 10, ///< LPDDR5 memory ZE_DEVICE_MEMORY_EXT_TYPE_SRAM = 11, ///< SRAM memory ZE_DEVICE_MEMORY_EXT_TYPE_L1 = 12, ///< L1 cache ZE_DEVICE_MEMORY_EXT_TYPE_L3 = 13, ///< L3 cache ZE_DEVICE_MEMORY_EXT_TYPE_GRF = 14, ///< Execution unit register file ZE_DEVICE_MEMORY_EXT_TYPE_SLM = 15, ///< Execution unit shared local memory ZE_DEVICE_MEMORY_EXT_TYPE_GDDR4 = 16, ///< GDDR4 memory ZE_DEVICE_MEMORY_EXT_TYPE_GDDR5 = 17, ///< GDDR5 memory ZE_DEVICE_MEMORY_EXT_TYPE_GDDR5X = 18, ///< GDDR5X memory ZE_DEVICE_MEMORY_EXT_TYPE_GDDR6 = 19, ///< GDDR6 memory ZE_DEVICE_MEMORY_EXT_TYPE_GDDR6X = 20, ///< GDDR6X memory ZE_DEVICE_MEMORY_EXT_TYPE_GDDR7 = 21, ///< GDDR7 memory ZE_DEVICE_MEMORY_EXT_TYPE_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_MEMORY_EXT_TYPE_* ENUMs } ze_device_memory_ext_type_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Memory properties /// /// @details /// - This structure may be returned from ::zeDeviceGetMemoryProperties via /// the `pNext` member of ::ze_device_memory_properties_t typedef struct _ze_device_memory_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_device_memory_ext_type_t type; ///< [out] The memory type uint64_t physicalSize; ///< [out] Physical memory size in bytes. A value of 0 indicates that this ///< property is not known. However, a call to ::zesMemoryGetState() will ///< correctly return the total size of usable memory. uint32_t readBandwidth; ///< [out] Design bandwidth for reads uint32_t writeBandwidth; ///< [out] Design bandwidth for writes ze_bandwidth_unit_t bandwidthUnit; ///< [out] bandwidth unit } ze_device_memory_ext_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Bfloat16 Conversions #if !defined(__GNUC__) #pragma region bfloat16conversions #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_BFLOAT16_CONVERSIONS_EXT_NAME /// @brief Bfloat16 Conversions Extension Name #define ZE_BFLOAT16_CONVERSIONS_EXT_NAME "ZE_extension_bfloat16_conversions" #endif // ZE_BFLOAT16_CONVERSIONS_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Bfloat16 Conversions Extension Version(s) typedef enum _ze_bfloat16_conversions_ext_version_t { ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_* ENUMs } ze_bfloat16_conversions_ext_version_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Device IP Version #if !defined(__GNUC__) #pragma region deviceipversion #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_DEVICE_IP_VERSION_EXT_NAME /// @brief Device IP Version Extension Name #define ZE_DEVICE_IP_VERSION_EXT_NAME "ZE_extension_device_ip_version" #endif // ZE_DEVICE_IP_VERSION_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Device IP Version Extension Version(s) typedef enum _ze_device_ip_version_version_t { ZE_DEVICE_IP_VERSION_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_DEVICE_IP_VERSION_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_DEVICE_IP_VERSION_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_DEVICE_IP_VERSION_VERSION_* ENUMs } ze_device_ip_version_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device IP version queried using ::zeDeviceGetProperties /// /// @details /// - This structure may be returned from ::zeDeviceGetProperties via the /// `pNext` member of ::ze_device_properties_t typedef struct _ze_device_ip_version_ext_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t ipVersion; ///< [out] Device IP version. The meaning of the device IP version is ///< implementation-defined, but newer devices should have a higher ///< version than older devices. } ze_device_ip_version_ext_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for querying kernel max group size properties. #if !defined(__GNUC__) #pragma region kernelMaxGroupSizeProperties #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_NAME /// @brief Kernel Max Group Size Properties Extension Name #define ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_NAME "ZE_extension_kernel_max_group_size_properties" #endif // ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel Max Group Size Properties Extension Version(s) typedef enum _ze_kernel_max_group_size_properties_ext_version_t { ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_* ENUMs } ze_kernel_max_group_size_properties_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Additional kernel max group size properties /// /// @details /// - This structure may be passed to ::zeKernelGetProperties, via the /// `pNext` member of ::ze_kernel_properties_t, to query additional kernel /// max group size properties. typedef struct _ze_kernel_max_group_size_properties_ext_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t maxGroupSize; ///< [out] maximum group size that can be used to execute the kernel. This ///< value may be less than or equal to the `maxTotalGroupSize` member of ///< ::ze_device_compute_properties_t. } ze_kernel_max_group_size_properties_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief compiler-independent type typedef ze_kernel_max_group_size_properties_ext_t ze_kernel_max_group_size_ext_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for querying sub-allocations properties. #if !defined(__GNUC__) #pragma region subAllocationsProperties #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_SUB_ALLOCATIONS_EXP_NAME /// @brief Sub-Allocations Properties Extension Name #define ZE_SUB_ALLOCATIONS_EXP_NAME "ZE_experimental_sub_allocations" #endif // ZE_SUB_ALLOCATIONS_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Sub-Allocations Properties Extension Version(s) typedef enum _ze_sub_allocations_exp_version_t { ZE_SUB_ALLOCATIONS_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_SUB_ALLOCATIONS_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_SUB_ALLOCATIONS_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_SUB_ALLOCATIONS_EXP_VERSION_* ENUMs } ze_sub_allocations_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Properties returned for a sub-allocation typedef struct _ze_sub_allocation_t { void* base; ///< [in,out][optional] base address of the sub-allocation size_t size; ///< [in,out][optional] size of the allocation } ze_sub_allocation_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Sub-Allocations Properties /// /// @details /// - This structure may be passed to ::zeMemGetAllocProperties, via the /// `pNext` member of ::ze_memory_allocation_properties_t. typedef struct _ze_memory_sub_allocations_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t* pCount; ///< [in,out] pointer to the number of sub-allocations. ///< if count is zero, then the driver shall update the value with the ///< total number of sub-allocations on which the allocation has been divided. ///< if count is greater than the number of sub-allocations, then the ///< driver shall update the value with the correct number of sub-allocations. ze_sub_allocation_t* pSubAllocations; ///< [in,out][optional][range(0, *pCount)] array of properties for sub-allocations. ///< if count is less than the number of sub-allocations available, then ///< driver shall only retrieve properties for that number of sub-allocations. } ze_memory_sub_allocations_exp_properties_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting the querying of synchronized event timestamps. #if !defined(__GNUC__) #pragma region eventQueryKernelTimestamps #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME /// @brief Event Query Kernel Timestamps Extension Name #define ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME "ZE_extension_event_query_kernel_timestamps" #endif // ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Event Query Kernel Timestamps Extension Version(s) typedef enum _ze_event_query_kernel_timestamps_ext_version_t { ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_* ENUMs } ze_event_query_kernel_timestamps_ext_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Event query kernel timestamps flags typedef uint32_t ze_event_query_kernel_timestamps_ext_flags_t; typedef enum _ze_event_query_kernel_timestamps_ext_flag_t { ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL = ZE_BIT(0), ///< Kernel timestamp results ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED = ZE_BIT(1), ///< Device event timestamps synchronized to the host time domain ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_* ENUMs } ze_event_query_kernel_timestamps_ext_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Event query kernel timestamps properties /// /// @details /// - This structure may be returned from ::zeDeviceGetProperties, via the /// `pNext` member of ::ze_device_properties_t. typedef struct _ze_event_query_kernel_timestamps_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_event_query_kernel_timestamps_ext_flags_t flags; ///< [out] 0 or some combination of ///< ::ze_event_query_kernel_timestamps_ext_flag_t flags } ze_event_query_kernel_timestamps_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Kernel timestamp clock data synchronized to the host time domain typedef struct _ze_synchronized_timestamp_data_ext_t { uint64_t kernelStart; ///< [out] synchronized clock at start of kernel execution uint64_t kernelEnd; ///< [out] synchronized clock at end of kernel execution } ze_synchronized_timestamp_data_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Synchronized kernel timestamp result typedef struct _ze_synchronized_timestamp_result_ext_t { ze_synchronized_timestamp_data_ext_t global; ///< [out] wall-clock data ze_synchronized_timestamp_data_ext_t context; ///< [out] context-active data; only includes clocks while device context ///< was actively executing. } ze_synchronized_timestamp_result_ext_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Event query kernel timestamps results properties typedef struct _ze_event_query_kernel_timestamps_results_ext_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_kernel_timestamp_result_t* pKernelTimestampsBuffer; ///< [in,out][optional][range(0, *pCount)] pointer to destination buffer of ///< kernel timestamp results ze_synchronized_timestamp_result_ext_t* pSynchronizedTimestampsBuffer; ///< [in,out][optional][range(0, *pCount)] pointer to destination buffer of ///< synchronized timestamp results } ze_event_query_kernel_timestamps_results_ext_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Query an event's timestamp value on the host, with domain preference. /// /// @details /// - For collecting *only* kernel timestamps, the application must ensure /// the event was created from an event pool that was created using /// ::ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP flag. /// - For collecting synchronized timestamps, the application must ensure /// the event was created from an event pool that was created using /// ::ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP flag. Kernel timestamps /// are also available from this type of event pool, but there is a /// performance cost. /// - The destination memory will be unmodified if the event has not been /// signaled. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support /// ::ZE_extension_event_query_kernel_timestamps. /// - The implementation must return all timestamps for the specified event /// and device pair. /// - The implementation must return all timestamps for all sub-devices when /// device handle is parent device. /// - The implementation may return all timestamps for sub-devices when /// device handle is sub-device or may return 0 for count. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hEvent` /// + `nullptr == hDevice` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCount` ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestampsExt( ze_event_handle_t hEvent, ///< [in] handle of the event ze_device_handle_t hDevice, ///< [in] handle of the device to query uint32_t* pCount, ///< [in,out] pointer to the number of event packets available. ///< - This value is implementation specific. ///< - if `*pCount` is zero, then the driver shall update the value with ///< the total number of event packets available. ///< - if `*pCount` is greater than the number of event packets ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. ///< - if `*pCount` is greater than the number of event packets ///< available, the driver may only update the valid elements. ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting ray tracing acceleration structure builder. #if !defined(__GNUC__) #pragma region RTASBuilder #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_RTAS_BUILDER_EXP_NAME /// @brief Ray Tracing Acceleration Structure Builder Extension Name #define ZE_RTAS_BUILDER_EXP_NAME "ZE_experimental_rtas_builder" #endif // ZE_RTAS_BUILDER_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Ray Tracing Acceleration Structure Builder Extension Version(s) typedef enum _ze_rtas_builder_exp_version_t { ZE_RTAS_BUILDER_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_RTAS_BUILDER_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_RTAS_BUILDER_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_EXP_VERSION_* ENUMs } ze_rtas_builder_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure device flags typedef uint32_t ze_rtas_device_exp_flags_t; typedef enum _ze_rtas_device_exp_flag_t { ZE_RTAS_DEVICE_EXP_FLAG_RESERVED = ZE_BIT(0), ///< reserved for future use ZE_RTAS_DEVICE_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_DEVICE_EXP_FLAG_* ENUMs } ze_rtas_device_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure format /// /// @details /// - This is an opaque ray tracing acceleration structure format /// identifier. typedef enum _ze_rtas_format_exp_t { ZE_RTAS_FORMAT_EXP_INVALID = 0, ///< Invalid acceleration structure format ZE_RTAS_FORMAT_EXP_MAX = 0x7ffffffe, ///< Maximum acceleration structure format code ZE_RTAS_FORMAT_EXP_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_FORMAT_EXP_* ENUMs } ze_rtas_format_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder flags typedef uint32_t ze_rtas_builder_exp_flags_t; typedef enum _ze_rtas_builder_exp_flag_t { ZE_RTAS_BUILDER_EXP_FLAG_RESERVED = ZE_BIT(0), ///< Reserved for future use ZE_RTAS_BUILDER_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_EXP_FLAG_* ENUMs } ze_rtas_builder_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder parallel operation flags typedef uint32_t ze_rtas_parallel_operation_exp_flags_t; typedef enum _ze_rtas_parallel_operation_exp_flag_t { ZE_RTAS_PARALLEL_OPERATION_EXP_FLAG_RESERVED = ZE_BIT(0), ///< Reserved for future use ZE_RTAS_PARALLEL_OPERATION_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_PARALLEL_OPERATION_EXP_FLAG_* ENUMs } ze_rtas_parallel_operation_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder geometry flags typedef uint32_t ze_rtas_builder_geometry_exp_flags_t; typedef enum _ze_rtas_builder_geometry_exp_flag_t { ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE = ZE_BIT(0), ///< non-opaque geometries invoke an any-hit shader ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_* ENUMs } ze_rtas_builder_geometry_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure builder geometry flags (see /// ::ze_rtas_builder_geometry_exp_flags_t) typedef uint8_t ze_rtas_builder_packed_geometry_exp_flags_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder instance flags typedef uint32_t ze_rtas_builder_instance_exp_flags_t; typedef enum _ze_rtas_builder_instance_exp_flag_t { ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_CULL_DISABLE = ZE_BIT(0), ///< disables culling of front-facing and back-facing triangles ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = ZE_BIT(1), ///< reverses front and back face of triangles ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_FORCE_OPAQUE = ZE_BIT(2), ///< forces instanced geometry to be opaque, unless ray flag forces it to ///< be non-opaque ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_FORCE_NON_OPAQUE = ZE_BIT(3),///< forces instanced geometry to be non-opaque, unless ray flag forces it ///< to be opaque ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_* ENUMs } ze_rtas_builder_instance_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure builder instance flags (see /// ::ze_rtas_builder_instance_exp_flags_t) typedef uint8_t ze_rtas_builder_packed_instance_exp_flags_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder build operation flags /// /// @details /// - These flags allow the application to tune the acceleration structure /// build operation. /// - The acceleration structure builder implementation might choose to use /// spatial splitting to split large or long primitives into smaller /// pieces. This may result in any-hit shaders being invoked multiple /// times for non-opaque primitives, unless /// ::ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION is specified. /// - Usage of any of these flags may reduce ray tracing performance. typedef uint32_t ze_rtas_builder_build_op_exp_flags_t; typedef enum _ze_rtas_builder_build_op_exp_flag_t { ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_COMPACT = ZE_BIT(0), ///< build more compact acceleration structure ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION = ZE_BIT(1), ///< guarantees single any-hit shader invocation per primitive ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_* ENUMs } ze_rtas_builder_build_op_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder build quality hint /// /// @details /// - Depending on use case different quality modes for acceleration /// structure build are supported. /// - A low-quality build builds an acceleration structure fast, but at the /// cost of some reduction in ray tracing performance. This mode is /// recommended for dynamic content, such as animated characters. /// - A medium-quality build uses a compromise between build quality and ray /// tracing performance. This mode should be used by default. /// - Higher ray tracing performance can be achieved by using a high-quality /// build, but acceleration structure build performance might be /// significantly reduced. typedef enum _ze_rtas_builder_build_quality_hint_exp_t { ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_LOW = 0, ///< build low-quality acceleration structure (fast) ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM = 1, ///< build medium-quality acceleration structure (slower) ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH = 2, ///< build high-quality acceleration structure (slow) ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_* ENUMs } ze_rtas_builder_build_quality_hint_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder geometry type typedef enum _ze_rtas_builder_geometry_type_exp_t { ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES = 0, ///< triangle mesh geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS = 1, ///< quad mesh geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL = 2, ///< procedural geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE = 3, ///< instance geometry type ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_* ENUMs } ze_rtas_builder_geometry_type_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure builder geometry type (see /// ::ze_rtas_builder_geometry_type_exp_t) typedef uint8_t ze_rtas_builder_packed_geometry_type_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure data buffer element format /// /// @details /// - Specifies the format of data buffer elements. /// - Data buffers may contain instancing transform matrices, triangle/quad /// vertex indices, etc... typedef enum _ze_rtas_builder_input_data_format_exp_t { ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3 = 0, ///< 3-component float vector (see ::ze_rtas_float3_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR = 1, ///< 3x4 affine transformation in column-major format (see ///< ::ze_rtas_transform_float3x4_column_major_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR = 2,///< 3x4 affine transformation in column-major format (see ///< ::ze_rtas_transform_float3x4_aligned_column_major_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR = 3, ///< 3x4 affine transformation in row-major format (see ///< ::ze_rtas_transform_float3x4_row_major_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_AABB = 4, ///< 3-dimensional axis-aligned bounding-box (see ::ze_rtas_aabb_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32 = 5, ///< Unsigned 32-bit triangle indices (see ///< ::ze_rtas_triangle_indices_uint32_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32 = 6, ///< Unsigned 32-bit quad indices (see ::ze_rtas_quad_indices_uint32_exp_t) ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_* ENUMs } ze_rtas_builder_input_data_format_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Packed ray tracing acceleration structure data buffer element format /// (see ::ze_rtas_builder_input_data_format_exp_t) typedef uint8_t ze_rtas_builder_packed_input_data_format_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of ray tracing acceleration structure builder object typedef struct _ze_rtas_builder_exp_handle_t *ze_rtas_builder_exp_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of ray tracing acceleration structure builder parallel /// operation object typedef struct _ze_rtas_parallel_operation_exp_handle_t *ze_rtas_parallel_operation_exp_handle_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder descriptor typedef struct _ze_rtas_builder_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_builder_exp_version_t builderVersion; ///< [in] ray tracing acceleration structure builder version } ze_rtas_builder_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder properties typedef struct _ze_rtas_builder_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_builder_exp_flags_t flags; ///< [out] ray tracing acceleration structure builder flags size_t rtasBufferSizeBytesExpected; ///< [out] expected size (in bytes) required for acceleration structure buffer ///< - When using an acceleration structure buffer of this size, the ///< build is expected to succeed; however, it is possible that the build ///< may fail with ::ZE_RESULT_EXP_RTAS_BUILD_RETRY size_t rtasBufferSizeBytesMaxRequired; ///< [out] worst-case size (in bytes) required for acceleration structure buffer ///< - When using an acceleration structure buffer of this size, the ///< build is guaranteed to not run out of memory. size_t scratchBufferSizeBytes; ///< [out] scratch buffer size (in bytes) required for acceleration ///< structure build. } ze_rtas_builder_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder parallel operation /// properties typedef struct _ze_rtas_parallel_operation_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_parallel_operation_exp_flags_t flags; ///< [out] ray tracing acceleration structure builder parallel operation ///< flags uint32_t maxConcurrency; ///< [out] maximum number of threads that may join the parallel operation } ze_rtas_parallel_operation_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure device properties /// /// @details /// - This structure may be passed to ::zeDeviceGetProperties, via `pNext` /// member of ::ze_device_properties_t. /// - The implementation shall populate `format` with a value other than /// ::ZE_RTAS_FORMAT_EXP_INVALID when the device supports ray tracing. typedef struct _ze_rtas_device_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_device_exp_flags_t flags; ///< [out] ray tracing acceleration structure device flags ze_rtas_format_exp_t rtasFormat; ///< [out] ray tracing acceleration structure format uint32_t rtasBufferAlignment; ///< [out] required alignment of acceleration structure buffer } ze_rtas_device_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief A 3-component vector type typedef struct _ze_rtas_float3_exp_t { float x; ///< [in] x-coordinate of float3 vector float y; ///< [in] y-coordinate of float3 vector float z; ///< [in] z-coordinate of float3 vector } ze_rtas_float3_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief 3x4 affine transformation in column-major layout /// /// @details /// - A 3x4 affine transformation in column major layout, consisting of vectors /// - vx=(vx_x, vx_y, vx_z), /// - vy=(vy_x, vy_y, vy_z), /// - vz=(vz_x, vz_y, vz_z), and /// - p=(p_x, p_y, p_z) /// - The transformation transforms a point (x, y, z) to: `x*vx + y*vy + /// z*vz + p`. typedef struct _ze_rtas_transform_float3x4_column_major_exp_t { float vx_x; ///< [in] element 0 of column 0 of 3x4 matrix float vx_y; ///< [in] element 1 of column 0 of 3x4 matrix float vx_z; ///< [in] element 2 of column 0 of 3x4 matrix float vy_x; ///< [in] element 0 of column 1 of 3x4 matrix float vy_y; ///< [in] element 1 of column 1 of 3x4 matrix float vy_z; ///< [in] element 2 of column 1 of 3x4 matrix float vz_x; ///< [in] element 0 of column 2 of 3x4 matrix float vz_y; ///< [in] element 1 of column 2 of 3x4 matrix float vz_z; ///< [in] element 2 of column 2 of 3x4 matrix float p_x; ///< [in] element 0 of column 3 of 3x4 matrix float p_y; ///< [in] element 1 of column 3 of 3x4 matrix float p_z; ///< [in] element 2 of column 3 of 3x4 matrix } ze_rtas_transform_float3x4_column_major_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief 3x4 affine transformation in column-major layout with aligned column /// vectors /// /// @details /// - A 3x4 affine transformation in column major layout, consisting of vectors /// - vx=(vx_x, vx_y, vx_z), /// - vy=(vy_x, vy_y, vy_z), /// - vz=(vz_x, vz_y, vz_z), and /// - p=(p_x, p_y, p_z) /// - The transformation transforms a point (x, y, z) to: `x*vx + y*vy + /// z*vz + p`. /// - The column vectors are aligned to 16-bytes and pad members are /// ignored. typedef struct _ze_rtas_transform_float3x4_aligned_column_major_exp_t { float vx_x; ///< [in] element 0 of column 0 of 3x4 matrix float vx_y; ///< [in] element 1 of column 0 of 3x4 matrix float vx_z; ///< [in] element 2 of column 0 of 3x4 matrix float pad0; ///< [in] ignored padding float vy_x; ///< [in] element 0 of column 1 of 3x4 matrix float vy_y; ///< [in] element 1 of column 1 of 3x4 matrix float vy_z; ///< [in] element 2 of column 1 of 3x4 matrix float pad1; ///< [in] ignored padding float vz_x; ///< [in] element 0 of column 2 of 3x4 matrix float vz_y; ///< [in] element 1 of column 2 of 3x4 matrix float vz_z; ///< [in] element 2 of column 2 of 3x4 matrix float pad2; ///< [in] ignored padding float p_x; ///< [in] element 0 of column 3 of 3x4 matrix float p_y; ///< [in] element 1 of column 3 of 3x4 matrix float p_z; ///< [in] element 2 of column 3 of 3x4 matrix float pad3; ///< [in] ignored padding } ze_rtas_transform_float3x4_aligned_column_major_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief 3x4 affine transformation in row-major layout /// /// @details /// - A 3x4 affine transformation in row-major layout, consisting of vectors /// - vx=(vx_x, vx_y, vx_z), /// - vy=(vy_x, vy_y, vy_z), /// - vz=(vz_x, vz_y, vz_z), and /// - p=(p_x, p_y, p_z) /// - The transformation transforms a point (x, y, z) to: `x*vx + y*vy + /// z*vz + p`. typedef struct _ze_rtas_transform_float3x4_row_major_exp_t { float vx_x; ///< [in] element 0 of row 0 of 3x4 matrix float vy_x; ///< [in] element 1 of row 0 of 3x4 matrix float vz_x; ///< [in] element 2 of row 0 of 3x4 matrix float p_x; ///< [in] element 3 of row 0 of 3x4 matrix float vx_y; ///< [in] element 0 of row 1 of 3x4 matrix float vy_y; ///< [in] element 1 of row 1 of 3x4 matrix float vz_y; ///< [in] element 2 of row 1 of 3x4 matrix float p_y; ///< [in] element 3 of row 1 of 3x4 matrix float vx_z; ///< [in] element 0 of row 2 of 3x4 matrix float vy_z; ///< [in] element 1 of row 2 of 3x4 matrix float vz_z; ///< [in] element 2 of row 2 of 3x4 matrix float p_z; ///< [in] element 3 of row 2 of 3x4 matrix } ze_rtas_transform_float3x4_row_major_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief A 3-dimensional axis-aligned bounding-box with lower and upper bounds /// in each dimension typedef struct _ze_rtas_aabb_exp_t { ze_rtas_float3_exp_t lower; ///< [in] lower bounds of AABB ze_rtas_float3_exp_t upper; ///< [in] upper bounds of AABB } ze_rtas_aabb_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Triangle represented using 3 vertex indices /// /// @details /// - Represents a triangle using 3 vertex indices that index into a vertex /// array that needs to be provided together with the index array. /// - The linear barycentric u/v parametrization of the triangle is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, and /// - (u=0, v=1) at v2 typedef struct _ze_rtas_triangle_indices_uint32_exp_t { uint32_t v0; ///< [in] first index pointing to the first triangle vertex in vertex array uint32_t v1; ///< [in] second index pointing to the second triangle vertex in vertex ///< array uint32_t v2; ///< [in] third index pointing to the third triangle vertex in vertex array } ze_rtas_triangle_indices_uint32_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Quad represented using 4 vertex indices /// /// @details /// - Represents a quad composed of 4 indices that index into a vertex array /// that needs to be provided together with the index array. /// - A quad is a triangle pair represented using 4 vertex indices v0, v1, /// v2, v3. /// The first triangle is made out of indices v0, v1, v3 and the second triangle /// from indices v2, v3, v1. The piecewise linear barycentric u/v parametrization /// of the quad is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, /// - (u=0, v=1) at v3, and /// - (u=1, v=1) at v2 /// This is achieved by correcting the u'/v' coordinates of the second /// triangle by /// *u = 1-u'* and *v = 1-v'*, yielding a piecewise linear parametrization. typedef struct _ze_rtas_quad_indices_uint32_exp_t { uint32_t v0; ///< [in] first index pointing to the first quad vertex in vertex array uint32_t v1; ///< [in] second index pointing to the second quad vertex in vertex array uint32_t v2; ///< [in] third index pointing to the third quad vertex in vertex array uint32_t v3; ///< [in] fourth index pointing to the fourth quad vertex in vertex array } ze_rtas_quad_indices_uint32_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder geometry info typedef struct _ze_rtas_builder_geometry_info_exp_t { ze_rtas_builder_packed_geometry_type_exp_t geometryType; ///< [in] geometry type } ze_rtas_builder_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder triangle mesh geometry info /// /// @details /// - The linear barycentric u/v parametrization of the triangle is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, and /// - (u=0, v=1) at v2 typedef struct _ze_rtas_builder_triangles_geometry_info_exp_t { ze_rtas_builder_packed_geometry_type_exp_t geometryType; ///< [in] geometry type, must be ///< ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES ze_rtas_builder_packed_geometry_exp_flags_t geometryFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_exp_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking ze_rtas_builder_packed_input_data_format_exp_t triangleFormat; ///< [in] format of triangle buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32 ze_rtas_builder_packed_input_data_format_exp_t vertexFormat; ///< [in] format of vertex buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3 uint32_t triangleCount; ///< [in] number of triangles in triangle buffer uint32_t vertexCount; ///< [in] number of vertices in vertex buffer uint32_t triangleStride; ///< [in] stride (in bytes) of triangles in triangle buffer uint32_t vertexStride; ///< [in] stride (in bytes) of vertices in vertex buffer void* pTriangleBuffer; ///< [in] pointer to array of triangle indices in specified format void* pVertexBuffer; ///< [in] pointer to array of triangle vertices in specified format } ze_rtas_builder_triangles_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder quad mesh geometry info /// /// @details /// - A quad is a triangle pair represented using 4 vertex indices v0, v1, /// v2, v3. /// The first triangle is made out of indices v0, v1, v3 and the second triangle /// from indices v2, v3, v1. The piecewise linear barycentric u/v parametrization /// of the quad is defined as: /// - (u=0, v=0) at v0, /// - (u=1, v=0) at v1, /// - (u=0, v=1) at v3, and /// - (u=1, v=1) at v2 /// This is achieved by correcting the u'/v' coordinates of the second /// triangle by /// *u = 1-u'* and *v = 1-v'*, yielding a piecewise linear parametrization. typedef struct _ze_rtas_builder_quads_geometry_info_exp_t { ze_rtas_builder_packed_geometry_type_exp_t geometryType; ///< [in] geometry type, must be ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS ze_rtas_builder_packed_geometry_exp_flags_t geometryFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_exp_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking ze_rtas_builder_packed_input_data_format_exp_t quadFormat; ///< [in] format of quad buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32 ze_rtas_builder_packed_input_data_format_exp_t vertexFormat; ///< [in] format of vertex buffer data, must be ///< ::ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3 uint32_t quadCount; ///< [in] number of quads in quad buffer uint32_t vertexCount; ///< [in] number of vertices in vertex buffer uint32_t quadStride; ///< [in] stride (in bytes) of quads in quad buffer uint32_t vertexStride; ///< [in] stride (in bytes) of vertices in vertex buffer void* pQuadBuffer; ///< [in] pointer to array of quad indices in specified format void* pVertexBuffer; ///< [in] pointer to array of quad vertices in specified format } ze_rtas_builder_quads_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief AABB callback function parameters typedef struct _ze_rtas_geometry_aabbs_exp_cb_params_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t primID; ///< [in] first primitive to return bounds for uint32_t primIDCount; ///< [in] number of primitives to return bounds for void* pGeomUserPtr; ///< [in] pointer provided through geometry descriptor void* pBuildUserPtr; ///< [in] pointer provided through ::zeRTASBuilderBuildExp function ze_rtas_aabb_exp_t* pBoundsOut; ///< [out] destination buffer to write AABB bounds to } ze_rtas_geometry_aabbs_exp_cb_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function pointer type to return AABBs for a range of /// procedural primitives typedef void (*ze_rtas_geometry_aabbs_cb_exp_t)( ze_rtas_geometry_aabbs_exp_cb_params_t* params ///< [in] callback function parameters structure ); /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder procedural primitives /// geometry info /// /// @details /// - A host-side bounds callback function is invoked by the acceleration /// structure builder to query the bounds of procedural primitives on /// demand. The callback is passed some `pGeomUserPtr` that can point to /// an application-side representation of the procedural primitives. /// Further, a second `pBuildUserPtr`, which is set by a parameter to /// ::zeRTASBuilderBuildExp, is passed to the callback. This allows the /// build to change the bounds of the procedural geometry, for example, to /// build a BVH only over a short time range to implement multi-segment /// motion blur. typedef struct _ze_rtas_builder_procedural_geometry_info_exp_t { ze_rtas_builder_packed_geometry_type_exp_t geometryType; ///< [in] geometry type, must be ///< ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL ze_rtas_builder_packed_geometry_exp_flags_t geometryFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_exp_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking uint8_t reserved; ///< [in] reserved for future use uint32_t primCount; ///< [in] number of primitives in geometry ze_rtas_geometry_aabbs_cb_exp_t pfnGetBoundsCb; ///< [in] pointer to callback function to get the axis-aligned bounding-box ///< for a range of primitives void* pGeomUserPtr; ///< [in] user data pointer passed to callback } ze_rtas_builder_procedural_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Ray tracing acceleration structure builder instance geometry info typedef struct _ze_rtas_builder_instance_geometry_info_exp_t { ze_rtas_builder_packed_geometry_type_exp_t geometryType; ///< [in] geometry type, must be ///< ::ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE ze_rtas_builder_packed_instance_exp_flags_t instanceFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_geometry_exp_flag_t ///< bits representing the geometry flags for all primitives of this ///< geometry uint8_t geometryMask; ///< [in] 8-bit geometry mask for ray masking ze_rtas_builder_packed_input_data_format_exp_t transformFormat; ///< [in] format of the specified transformation uint32_t instanceUserID; ///< [in] user-specified identifier for the instance void* pTransform; ///< [in] object-to-world instance transformation in specified format ze_rtas_aabb_exp_t* pBounds; ///< [in] object-space axis-aligned bounding-box of the instanced ///< acceleration structure void* pAccelerationStructure; ///< [in] pointer to acceleration structure to instantiate } ze_rtas_builder_instance_geometry_info_exp_t; /////////////////////////////////////////////////////////////////////////////// /// @brief typedef struct _ze_rtas_builder_build_op_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_rtas_format_exp_t rtasFormat; ///< [in] ray tracing acceleration structure format ze_rtas_builder_build_quality_hint_exp_t buildQuality; ///< [in] acceleration structure build quality hint ze_rtas_builder_build_op_exp_flags_t buildFlags; ///< [in] 0 or some combination of ::ze_rtas_builder_build_op_exp_flag_t ///< flags const ze_rtas_builder_geometry_info_exp_t** ppGeometries; ///< [in][optional][range(0, `numGeometries`)] NULL or a valid array of ///< pointers to geometry infos uint32_t numGeometries; ///< [in] number of geometries in geometry infos array, can be zero when ///< `ppGeometries` is NULL } ze_rtas_builder_build_op_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a ray tracing acceleration structure builder object /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support ::ZE_experimental_rtas_builder /// extension. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pDescriptor` /// + `nullptr == phBuilder` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_BUILDER_EXP_VERSION_CURRENT < pDescriptor->builderVersion` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExp( ze_driver_handle_t hDriver, ///< [in] handle of driver object const ze_rtas_builder_exp_desc_t* pDescriptor, ///< [in] pointer to builder descriptor ze_rtas_builder_exp_handle_t* phBuilder ///< [out] handle of builder object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves ray tracing acceleration structure builder properties /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hBuilder` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pBuildOpDescriptor` /// + `nullptr == pProperties` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_FORMAT_EXP_MAX < pBuildOpDescriptor->rtasFormat` /// + `::ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < pBuildOpDescriptor->buildQuality` /// + `0x3 < pBuildOpDescriptor->buildFlags` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExp( ze_rtas_builder_exp_handle_t hBuilder, ///< [in] handle of builder object const ze_rtas_builder_build_op_exp_desc_t* pBuildOpDescriptor, ///< [in] pointer to build operation descriptor ze_rtas_builder_exp_properties_t* pProperties ///< [in,out] query result for builder properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Checks ray tracing acceleration structure format compatibility /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_FORMAT_EXP_MAX < rtasFormatA` /// + `::ZE_RTAS_FORMAT_EXP_MAX < rtasFormatB` /// - ::ZE_RESULT_SUCCESS /// + An acceleration structure built with `rtasFormatA` is compatible with devices that report `rtasFormatB`. /// - ::ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE /// + An acceleration structure built with `rtasFormatA` is **not** compatible with devices that report `rtasFormatB`. ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExp( ze_driver_handle_t hDriver, ///< [in] handle of driver object ze_rtas_format_exp_t rtasFormatA, ///< [in] operand A ze_rtas_format_exp_t rtasFormatB ///< [in] operand B ); /////////////////////////////////////////////////////////////////////////////// /// @brief Build ray tracing acceleration structure /// /// @details /// - This function builds an acceleration structure of the scene consisting /// of the specified geometry information and writes the acceleration /// structure to the provided destination buffer. All types of geometries /// can get freely mixed inside a scene. /// - It is the user's responsibility to manage the acceleration structure /// buffer allocation, de-allocation, and potential prefetching to the /// device memory. The required size of the acceleration structure buffer /// can be queried with the ::zeRTASBuilderGetBuildPropertiesExp function. /// The acceleration structure buffer must be a shared USM allocation and /// should be present on the host at build time. The referenced scene data /// (index- and vertex- buffers) can be standard host allocations, and /// will not be referenced into by the build acceleration structure. /// - Before an acceleration structure can be built, the user must allocate /// the memory for the acceleration structure buffer and scratch buffer /// using sizes based on a query for the estimated size properties. /// - When using the "worst-case" size for the acceleration structure /// buffer, the acceleration structure construction will never fail with ::ZE_RESULT_EXP_RTAS_BUILD_RETRY. /// - When using the "expected" size for the acceleration structure buffer, /// the acceleration structure construction may fail with /// ::ZE_RESULT_EXP_RTAS_BUILD_RETRY. If this happens, the user may resize /// their acceleration structure buffer using the returned /// `*pRtasBufferSizeBytes` value, which will be updated with an improved /// size estimate that will likely result in a successful build. /// - The acceleration structure construction is run on the host and is /// synchronous, thus after the function returns with a successful result, /// the acceleration structure may be used. /// - All provided data buffers must be host-accessible. /// - The acceleration structure buffer must be a USM allocation. /// - A successfully constructed acceleration structure is entirely /// self-contained. There is no requirement for input data to persist /// beyond build completion. /// - A successfully constructed acceleration structure is non-copyable. /// - Acceleration structure construction may be parallelized by passing a /// valid handle to a parallel operation object and joining that parallel /// operation using ::zeRTASParallelOperationJoinExp with user-provided /// worker threads. /// - **Additional Notes** /// - "The geometry infos array, geometry infos, and scratch buffer must /// all be standard host memory allocations." /// - "A pointer to a geometry info can be a null pointer, in which case /// the geometry is treated as empty." /// - "If no parallel operation handle is provided, the build is run /// sequentially on the current thread." /// - "A parallel operation object may only be associated with a single /// acceleration structure build at a time." /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hBuilder` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pBuildOpDescriptor` /// + `nullptr == pScratchBuffer` /// + `nullptr == pRtasBuffer` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `::ZE_RTAS_FORMAT_EXP_MAX < pBuildOpDescriptor->rtasFormat` /// + `::ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < pBuildOpDescriptor->buildQuality` /// + `0x3 < pBuildOpDescriptor->buildFlags` /// - ::ZE_RESULT_EXP_RTAS_BUILD_DEFERRED /// + Acceleration structure build completion is deferred to parallel operation join. /// - ::ZE_RESULT_EXP_RTAS_BUILD_RETRY /// + Acceleration structure build failed due to insufficient resources, retry the build operation with a larger acceleration structure buffer allocation. /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE /// + Acceleration structure build failed due to parallel operation object participation in another build operation. ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExp( ze_rtas_builder_exp_handle_t hBuilder, ///< [in] handle of builder object const ze_rtas_builder_build_op_exp_desc_t* pBuildOpDescriptor, ///< [in] pointer to build operation descriptor void* pScratchBuffer, ///< [in][range(0, `scratchBufferSizeBytes`)] scratch buffer to be used ///< during acceleration structure construction size_t scratchBufferSizeBytes, ///< [in] size of scratch buffer, in bytes void* pRtasBuffer, ///< [in] pointer to destination buffer size_t rtasBufferSizeBytes, ///< [in] destination buffer size, in bytes ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ///< [in][optional] handle to parallel operation object void* pBuildUserPtr, ///< [in][optional] pointer passed to callbacks ze_rtas_aabb_exp_t* pBounds, ///< [in,out][optional] pointer to destination address for acceleration ///< structure bounds size_t* pRtasBufferSizeBytes ///< [out][optional] updated acceleration structure size requirement, in ///< bytes ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a ray tracing acceleration structure builder object /// /// @details /// - The implementation of this function may immediately release any /// internal Host and Device resources associated with this builder. /// - The application must **not** call this function from simultaneous /// threads with the same builder handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hBuilder` /// - ::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExp( ze_rtas_builder_exp_handle_t hBuilder ///< [in][release] handle of builder object to destroy ); /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a ray tracing acceleration structure builder parallel /// operation object /// /// @details /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// - The implementation must support ::ZE_experimental_rtas_builder /// extension. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hDriver` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phParallelOperation` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExp( ze_driver_handle_t hDriver, ///< [in] handle of driver object ze_rtas_parallel_operation_exp_handle_t* phParallelOperation ///< [out] handle of parallel operation object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves ray tracing acceleration structure builder parallel /// operation properties /// /// @details /// - The application must first bind the parallel operation object to a /// build operation before it may query the parallel operation properties. /// In other words, the application must first call /// ::zeRTASBuilderBuildExp with **hParallelOperation** before calling /// this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hParallelOperation` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pProperties` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ///< [in] handle of parallel operation object ze_rtas_parallel_operation_exp_properties_t* pProperties ///< [in,out] query result for parallel operation properties ); /////////////////////////////////////////////////////////////////////////////// /// @brief Joins a parallel build operation /// /// @details /// - All worker threads return the same error code for the parallel build /// operation upon build completion /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hParallelOperation` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ///< [in] handle of parallel operation object ); /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys a ray tracing acceleration structure builder parallel /// operation object /// /// @details /// - The implementation of this function may immediately release any /// internal Host and Device resources associated with this parallel /// operation. /// - The application must **not** call this function from simultaneous /// threads with the same parallel operation handle. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hParallelOperation` ZE_APIEXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ///< [in][release] handle of parallel operation object to destroy ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension APIs for Counter-based Event Pools #if !defined(__GNUC__) #pragma region counterbasedeventpool #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME /// @brief Counter-based Event Pools Extension Name #define ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME "ZE_experimental_event_pool_counter_based" #endif // ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Counter-based Event Pools Extension Version(s) typedef enum _ze_event_pool_counter_based_exp_version_t { ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_* ENUMs } ze_event_pool_counter_based_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Supported event flags for defining counter-based event pools. typedef uint32_t ze_event_pool_counter_based_exp_flags_t; typedef enum _ze_event_pool_counter_based_exp_flag_t { ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE = ZE_BIT(0), ///< Counter-based event pool is used for immediate command lists (default) ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE = ZE_BIT(1), ///< Counter-based event pool is for non-immediate command lists ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_* ENUMs } ze_event_pool_counter_based_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Event pool descriptor for counter-based events. This structure may be /// passed to ::zeEventPoolCreate as pNext member of /// ::ze_event_pool_desc_t. typedef struct _ze_event_pool_counter_based_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_event_pool_counter_based_exp_flags_t flags; ///< [in] mode flags. ///< must be 0 (default) or a valid value of ::ze_event_pool_counter_based_exp_flag_t ///< default behavior is counter-based event pool is only used for ///< immediate command lists. } ze_event_pool_counter_based_exp_desc_t; #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting bindless images. #if !defined(__GNUC__) #pragma region bindlessimages #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_BINDLESS_IMAGE_EXP_NAME /// @brief Image Memory Properties Extension Name #define ZE_BINDLESS_IMAGE_EXP_NAME "ZE_experimental_bindless_image" #endif // ZE_BINDLESS_IMAGE_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Bindless Image Extension Version(s) typedef enum _ze_bindless_image_exp_version_t { ZE_BINDLESS_IMAGE_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_BINDLESS_IMAGE_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_BINDLESS_IMAGE_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_BINDLESS_IMAGE_EXP_VERSION_* ENUMs } ze_bindless_image_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image flags for Bindless images typedef uint32_t ze_image_bindless_exp_flags_t; typedef enum _ze_image_bindless_exp_flag_t { ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS = ZE_BIT(0), ///< Bindless images are created with ::zeImageCreate. The image handle ///< created with this flag is valid on both host and device. ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE = ZE_BIT(1), ///< Bindless sampled images are created with ::zeImageCreate by combining ///< BINDLESS and SAMPLED_IMAGE. ///< Create sampled image view from bindless unsampled image using SAMPLED_IMAGE. ZE_IMAGE_BINDLESS_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMAGE_BINDLESS_EXP_FLAG_* ENUMs } ze_image_bindless_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image descriptor for bindless images. This structure may be passed to /// ::zeImageCreate via pNext member of ::ze_image_desc_t. typedef struct _ze_image_bindless_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_image_bindless_exp_flags_t flags; ///< [in] image flags. ///< must be 0 (default) or a valid value of ::ze_image_bindless_exp_flag_t ///< default behavior is bindless images are not used when creating handles ///< via ::zeImageCreate. ///< When the flag is passed to ::zeImageCreate, then only the memory for ///< the image is allocated. ///< Additional image handles can be created with ::zeImageViewCreateExt. ///< When ::ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE flag is passed, ///< ::ze_sampler_desc_t must be attached via pNext member of ::ze_image_bindless_exp_desc_t. } ze_image_bindless_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Image descriptor for bindless images created from pitched allocations. /// This structure may be passed to ::zeImageCreate via pNext member of /// ::ze_image_desc_t. typedef struct _ze_image_pitched_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). void* ptr; ///< [in] pointer to pitched device allocation allocated using ::zeMemAllocDevice } ze_image_pitched_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Device specific properties for pitched allocations /// /// @details /// - This structure may be passed to ::zeDeviceGetImageProperties via the /// pNext member of ::ze_device_image_properties_t. typedef struct _ze_device_pitched_alloc_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). size_t maxImageLinearWidth; ///< [out] Maximum image linear width. size_t maxImageLinearHeight; ///< [out] Maximum image linear height. } ze_device_pitched_alloc_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Allocate pitched USM memory for images /// /// @details /// - Retrieves pitch for 2D image given the width, height and size in bytes /// - The memory is then allocated using ::zeMemAllocDevice by providing /// input size calculated as the returned pitch value multiplied by image height /// - The application may call this function from simultaneous threads /// - The implementation of this function must be thread-safe. /// - The implementation of this function should be lock-free. /// - The implementation must support ::ZE_experimental_bindless_image extension. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hContext` /// + `nullptr == hDevice` ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetPitchFor2dImage( ze_context_handle_t hContext, ///< [in] handle of the context object ze_device_handle_t hDevice, ///< [in] handle of the device size_t imageWidth, ///< [in] imageWidth size_t imageHeight, ///< [in] imageHeight unsigned int elementSizeInBytes, ///< [in] Element size in bytes size_t * rowPitch ///< [out] rowPitch ); /////////////////////////////////////////////////////////////////////////////// /// @brief Get bindless device offset for image /// /// @details /// - The application may call this function from simultaneous threads /// - The implementation of this function must be thread-safe. /// - The implementation of this function should be lock-free. /// - The implementation must support ::ZE_experimental_bindless_image /// extension. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hImage` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pDeviceOffset` ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetDeviceOffsetExp( ze_image_handle_t hImage, ///< [in] handle of the image uint64_t* pDeviceOffset ///< [out] bindless device offset for image ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting compute graphs. #if !defined(__GNUC__) #pragma region commandListClone #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_COMMAND_LIST_CLONE_EXP_NAME /// @brief Command List Clone Extension Name #define ZE_COMMAND_LIST_CLONE_EXP_NAME "ZE_experimental_command_list_clone" #endif // ZE_COMMAND_LIST_CLONE_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Command List Clone Extension Version(s) typedef enum _ze_command_list_clone_exp_version_t { ZE_COMMAND_LIST_CLONE_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_COMMAND_LIST_CLONE_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_COMMAND_LIST_CLONE_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_COMMAND_LIST_CLONE_EXP_VERSION_* ENUMs } ze_command_list_clone_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Creates a command list as the clone of another command list. /// /// @details /// - The source command list must be created with the /// ::ZE_COMMAND_LIST_FLAG_EXP_CLONEABLE flag. /// - The source command list must be closed prior to cloning. /// - The source command list may be cloned while it is running on the /// device. /// - The cloned command list inherits all properties of the source command /// list. /// - The cloned command list must be destroyed prior to the source command /// list. /// - The application must only use the command list for the device, or its /// sub-devices, which was provided during creation. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phClonedCommandList` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreateCloneExp( ze_command_list_handle_t hCommandList, ///< [in] handle to source command list (the command list to clone) ze_command_list_handle_t* phClonedCommandList ///< [out] pointer to handle of the cloned command list ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting compute graphs. #if !defined(__GNUC__) #pragma region immediateCommandListAppend #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME /// @brief Immediate Command List Append Extension Name #define ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME "ZE_experimental_immediate_command_list_append" #endif // ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Immediate Command List Append Extension Version(s) typedef enum _ze_immediate_command_list_append_exp_version_t { ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_* ENUMs } ze_immediate_command_list_append_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Appends command lists to dispatch from an immediate command list. /// /// @details /// - The application must call this function only with command lists /// created with ::zeCommandListCreateImmediate. /// - The command lists passed to this function in the `phCommandLists` /// argument must be regular command lists (i.e. not immediate command /// lists). /// - The application may call this function from simultaneous threads. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandListImmediate` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == phCommandLists` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListImmediateAppendCommandListsExp( ze_command_list_handle_t hCommandListImmediate, ///< [in] handle of the immediate command list uint32_t numCommandLists, ///< [in] number of command lists ze_command_list_handle_t* phCommandLists, ///< [in][range(0, numCommandLists)] handles of command lists ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion ///< - if not null, this event is signaled after the completion of all ///< appended command lists uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing appended ///< command lists; must be 0 if nullptr == phWaitEvents ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before executing appended command lists. ///< - if not null, all wait events must be satisfied prior to the start ///< of any appended command list(s) ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero Extension for supporting compute graphs with dynamic properties. #if !defined(__GNUC__) #pragma region mutableCommandList #endif /////////////////////////////////////////////////////////////////////////////// #ifndef ZE_MUTABLE_COMMAND_LIST_EXP_NAME /// @brief Mutable Command List Extension Name #define ZE_MUTABLE_COMMAND_LIST_EXP_NAME "ZE_experimental_mutable_command_list" #endif // ZE_MUTABLE_COMMAND_LIST_EXP_NAME /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable Command List Extension Version(s) typedef enum _ze_mutable_command_list_exp_version_t { ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_1 = ZE_MAKE_VERSION( 1, 1 ), ///< version 1.1 ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 1 ), ///< latest known version ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_* ENUMs } ze_mutable_command_list_exp_version_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable command flags typedef uint32_t ze_mutable_command_exp_flags_t; typedef enum _ze_mutable_command_exp_flag_t { ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS = ZE_BIT(0), ///< kernel arguments ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT = ZE_BIT(1), ///< kernel group count ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE = ZE_BIT(2), ///< kernel group size ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET = ZE_BIT(3), ///< kernel global offset ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT = ZE_BIT(4), ///< command signal event ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS = ZE_BIT(5), ///< command wait events ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION = ZE_BIT(6), ///< command kernel ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENTS = ZE_BIT(7), ///< graph arguments ZE_MUTABLE_COMMAND_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MUTABLE_COMMAND_EXP_FLAG_* ENUMs } ze_mutable_command_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable command identifier descriptor typedef struct _ze_mutable_command_id_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_mutable_command_exp_flags_t flags; ///< [in] mutable command flags. ///< - must be 0 (default, equivalent to setting all flags bar kernel ///< instruction), or a valid combination of ::ze_mutable_command_exp_flag_t ///< - in order to include kernel instruction mutation, ///< ::ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION must be explictly included } ze_mutable_command_id_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable command list flags typedef uint32_t ze_mutable_command_list_exp_flags_t; typedef enum _ze_mutable_command_list_exp_flag_t { ZE_MUTABLE_COMMAND_LIST_EXP_FLAG_RESERVED = ZE_BIT(0), ///< reserved ZE_MUTABLE_COMMAND_LIST_EXP_FLAG_FORCE_UINT32 = 0x7fffffff, ///< Value marking end of ZE_MUTABLE_COMMAND_LIST_EXP_FLAG_* ENUMs } ze_mutable_command_list_exp_flag_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable command list properties typedef struct _ze_mutable_command_list_exp_properties_t { ze_structure_type_t stype; ///< [in] type of this structure void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_mutable_command_list_exp_flags_t mutableCommandListFlags; ///< [out] mutable command list flags ze_mutable_command_exp_flags_t mutableCommandFlags; ///< [out] mutable command flags } ze_mutable_command_list_exp_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable command list descriptor typedef struct _ze_mutable_command_list_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). ze_mutable_command_list_exp_flags_t flags; ///< [in] mutable command list flags. ///< - must be 0 (default) or a valid combination of ::ze_mutable_command_list_exp_flag_t } ze_mutable_command_list_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable commands descriptor typedef struct _ze_mutable_commands_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t flags; ///< [in] must be 0, this field is reserved for future use } ze_mutable_commands_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable kernel argument descriptor typedef struct _ze_mutable_kernel_argument_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t commandId; ///< [in] command identifier uint32_t argIndex; ///< [in] kernel argument index size_t argSize; ///< [in] kernel argument size const void* pArgValue; ///< [in] pointer to kernel argument value } ze_mutable_kernel_argument_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable kernel group count descriptor typedef struct _ze_mutable_group_count_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t commandId; ///< [in] command identifier const ze_group_count_t* pGroupCount; ///< [in] pointer to group count } ze_mutable_group_count_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable kernel group size descriptor typedef struct _ze_mutable_group_size_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t commandId; ///< [in] command identifier uint32_t groupSizeX; ///< [in] group size for X dimension to use for the kernel uint32_t groupSizeY; ///< [in] group size for Y dimension to use for the kernel uint32_t groupSizeZ; ///< [in] group size for Z dimension to use for the kernel } ze_mutable_group_size_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable kernel global offset descriptor typedef struct _ze_mutable_global_offset_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t commandId; ///< [in] command identifier uint32_t offsetX; ///< [in] global offset for X dimension to use for this kernel uint32_t offsetY; ///< [in] global offset for Y dimension to use for this kernel uint32_t offsetZ; ///< [in] global offset for Z dimension to use for this kernel } ze_mutable_global_offset_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Mutable graph argument descriptor typedef struct _ze_mutable_graph_argument_exp_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint64_t commandId; ///< [in] command identifier uint32_t argIndex; ///< [in] graph argument index const void* pArgValue; ///< [in] pointer to graph argument value } ze_mutable_graph_argument_exp_desc_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Returns a unique command identifier for the next command to be /// appended to a command list. /// /// @details /// - This function may only be called for a mutable command list. /// - This function may not be called on a closed command list. /// - This function may be called from simultaneous threads with the same /// command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == pCommandId` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0xff < desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListGetNextCommandIdExp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list const ze_mutable_command_id_exp_desc_t* desc, ///< [in] pointer to mutable command identifier descriptor uint64_t* pCommandId ///< [out] pointer to mutable command identifier to be written ); /////////////////////////////////////////////////////////////////////////////// /// @brief Returns a unique command identifier for the next command to be /// appended to a command list. Provides possible kernel handles for /// kernel mutation when ::ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION /// flag is present. /// /// @details /// - This function may only be called for a mutable command list. /// - This function may not be called on a closed command list. /// - This function may be called from simultaneous threads with the same /// command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// + `nullptr == pCommandId` /// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION /// + `0xff < desc->flags` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListGetNextCommandIdWithKernelsExp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list const ze_mutable_command_id_exp_desc_t* desc, ///< [in][out] pointer to mutable command identifier descriptor uint32_t numKernels, ///< [in][optional] number of entries on phKernels list ze_kernel_handle_t* phKernels, ///< [in][optional][range(0, numKernels)] list of kernels that user can ///< switch between using ::zeCommandListUpdateMutableCommandKernelsExp ///< call uint64_t* pCommandId ///< [out] pointer to mutable command identifier to be written ); /////////////////////////////////////////////////////////////////////////////// /// @brief Updates mutable commands. /// /// @details /// - This function may only be called for a mutable command list. /// - The application must synchronize mutable command list execution before /// calling this function. /// - The application must close a mutable command list after completing all /// updates. /// - This function must not be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == desc` /// - ::ZE_RESULT_ERROR_INVALID_ARGUMENT /// + Invalid kernel argument or not matching update descriptor provided ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListUpdateMutableCommandsExp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list const ze_mutable_commands_exp_desc_t* desc ///< [in] pointer to mutable commands descriptor; multiple descriptors may ///< be chained via `pNext` member ); /////////////////////////////////////////////////////////////////////////////// /// @brief Updates the signal event for a mutable command in a mutable command /// list. /// /// @details /// - This function may only be called for a mutable command list. /// - The type, scope and flags of the signal event must match those of the /// source command. /// - The application must synchronize mutable command list execution before /// calling this function. /// - The application must close a mutable command list after completing all /// updates. /// - This function must not be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListUpdateMutableCommandSignalEventExp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint64_t commandId, ///< [in] command identifier ze_event_handle_t hSignalEvent ///< [in][optional] handle of the event to signal on completion ); /////////////////////////////////////////////////////////////////////////////// /// @brief Updates the wait events for a mutable command in a mutable command /// list. /// /// @details /// - This function may only be called for a mutable command list. /// - The number of wait events must match that of the source command. /// - The type, scope and flags of the wait events must match those of the /// source command. /// - Passing `nullptr` as the wait events will update the command to not /// wait on any events prior to dispatch. /// - Passing `nullptr` as an event on event wait list will remove event /// dependency from this wait list slot. /// - The application must synchronize mutable command list execution before /// calling this function. /// - The application must close a mutable command list after completing all /// updates. /// - This function must not be called from simultaneous threads with the /// same command list handle. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_SIZE /// + The `numWaitEvents` parameter does not match that of the original command. ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListUpdateMutableCommandWaitEventsExp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint64_t commandId, ///< [in] command identifier uint32_t numWaitEvents, ///< [in][optional] the number of wait events ze_event_handle_t* phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait ///< on before launching ); /////////////////////////////////////////////////////////////////////////////// /// @brief Updates the kernel for a mutable command in a mutable command list. /// /// @details /// - This function may only be called for a mutable command list. /// - The kernel handle must be from the provided list for given command id. /// - The application must synchronize mutable command list execution before /// calling this function. /// - The application must close a mutable command list after completing all /// updates. /// - This function must not be called from simultaneous threads with the /// same command list handle. /// - This function must be called before updating kernel arguments and /// dispatch parameters, when kernel is mutated. /// - The implementation of this function should be lock-free. /// /// @returns /// - ::ZE_RESULT_SUCCESS /// - ::ZE_RESULT_ERROR_UNINITIALIZED /// - ::ZE_RESULT_ERROR_DEVICE_LOST /// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY /// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE /// + `nullptr == hCommandList` /// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER /// + `nullptr == pCommandId` /// + `nullptr == phKernels` /// - ::ZE_RESULT_ERROR_INVALID_KERNEL_HANDLE /// + Invalid kernel handle provided for the mutation kernel instruction operation. ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListUpdateMutableCommandKernelsExp( ze_command_list_handle_t hCommandList, ///< [in] handle of the command list uint32_t numKernels, ///< [in] the number of kernels to update uint64_t* pCommandId, ///< [in][range(0, numKernels)] command identifier ze_kernel_handle_t* phKernels ///< [in][range(0, numKernels)] handle of the kernel for a command ///< identifier to switch to ); #if !defined(__GNUC__) #pragma endregion #endif // Intel 'oneAPI' Level-Zero API Callbacks #if !defined(__GNUC__) #pragma region callbacks #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeInit /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_init_params_t { ze_init_flags_t* pflags; } ze_init_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeInit /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnInitCb_t)( ze_init_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Global callback functions pointers typedef struct _ze_global_callbacks_t { ze_pfnInitCb_t pfnInitCb; } ze_global_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDriverGet /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_driver_get_params_t { uint32_t** ppCount; ze_driver_handle_t** pphDrivers; } ze_driver_get_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDriverGet /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDriverGetCb_t)( ze_driver_get_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDriverGetApiVersion /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_driver_get_api_version_params_t { ze_driver_handle_t* phDriver; ze_api_version_t** pversion; } ze_driver_get_api_version_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDriverGetApiVersion /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDriverGetApiVersionCb_t)( ze_driver_get_api_version_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDriverGetProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_driver_get_properties_params_t { ze_driver_handle_t* phDriver; ze_driver_properties_t** ppDriverProperties; } ze_driver_get_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDriverGetProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDriverGetPropertiesCb_t)( ze_driver_get_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDriverGetIpcProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_driver_get_ipc_properties_params_t { ze_driver_handle_t* phDriver; ze_driver_ipc_properties_t** ppIpcProperties; } ze_driver_get_ipc_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDriverGetIpcProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDriverGetIpcPropertiesCb_t)( ze_driver_get_ipc_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDriverGetExtensionProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_driver_get_extension_properties_params_t { ze_driver_handle_t* phDriver; uint32_t** ppCount; ze_driver_extension_properties_t** ppExtensionProperties; } ze_driver_get_extension_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDriverGetExtensionProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDriverGetExtensionPropertiesCb_t)( ze_driver_get_extension_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Driver callback functions pointers typedef struct _ze_driver_callbacks_t { ze_pfnDriverGetCb_t pfnGetCb; ze_pfnDriverGetApiVersionCb_t pfnGetApiVersionCb; ze_pfnDriverGetPropertiesCb_t pfnGetPropertiesCb; ze_pfnDriverGetIpcPropertiesCb_t pfnGetIpcPropertiesCb; ze_pfnDriverGetExtensionPropertiesCb_t pfnGetExtensionPropertiesCb; } ze_driver_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGet /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_params_t { ze_driver_handle_t* phDriver; uint32_t** ppCount; ze_device_handle_t** pphDevices; } ze_device_get_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGet /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetCb_t)( ze_device_get_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetSubDevices /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_sub_devices_params_t { ze_device_handle_t* phDevice; uint32_t** ppCount; ze_device_handle_t** pphSubdevices; } ze_device_get_sub_devices_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetSubDevices /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetSubDevicesCb_t)( ze_device_get_sub_devices_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_properties_params_t { ze_device_handle_t* phDevice; ze_device_properties_t** ppDeviceProperties; } ze_device_get_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetPropertiesCb_t)( ze_device_get_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetComputeProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_compute_properties_params_t { ze_device_handle_t* phDevice; ze_device_compute_properties_t** ppComputeProperties; } ze_device_get_compute_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetComputeProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetComputePropertiesCb_t)( ze_device_get_compute_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetModuleProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_module_properties_params_t { ze_device_handle_t* phDevice; ze_device_module_properties_t** ppModuleProperties; } ze_device_get_module_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetModuleProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetModulePropertiesCb_t)( ze_device_get_module_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetCommandQueueGroupProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_command_queue_group_properties_params_t { ze_device_handle_t* phDevice; uint32_t** ppCount; ze_command_queue_group_properties_t** ppCommandQueueGroupProperties; } ze_device_get_command_queue_group_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetCommandQueueGroupProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetCommandQueueGroupPropertiesCb_t)( ze_device_get_command_queue_group_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetMemoryProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_memory_properties_params_t { ze_device_handle_t* phDevice; uint32_t** ppCount; ze_device_memory_properties_t** ppMemProperties; } ze_device_get_memory_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetMemoryProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetMemoryPropertiesCb_t)( ze_device_get_memory_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetMemoryAccessProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_memory_access_properties_params_t { ze_device_handle_t* phDevice; ze_device_memory_access_properties_t** ppMemAccessProperties; } ze_device_get_memory_access_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetMemoryAccessProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetMemoryAccessPropertiesCb_t)( ze_device_get_memory_access_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetCacheProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_cache_properties_params_t { ze_device_handle_t* phDevice; uint32_t** ppCount; ze_device_cache_properties_t** ppCacheProperties; } ze_device_get_cache_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetCacheProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetCachePropertiesCb_t)( ze_device_get_cache_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetImageProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_image_properties_params_t { ze_device_handle_t* phDevice; ze_device_image_properties_t** ppImageProperties; } ze_device_get_image_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetImageProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetImagePropertiesCb_t)( ze_device_get_image_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetExternalMemoryProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_external_memory_properties_params_t { ze_device_handle_t* phDevice; ze_device_external_memory_properties_t** ppExternalMemoryProperties; } ze_device_get_external_memory_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetExternalMemoryProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetExternalMemoryPropertiesCb_t)( ze_device_get_external_memory_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetP2PProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_p2_p_properties_params_t { ze_device_handle_t* phDevice; ze_device_handle_t* phPeerDevice; ze_device_p2p_properties_t** ppP2PProperties; } ze_device_get_p2_p_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetP2PProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetP2PPropertiesCb_t)( ze_device_get_p2_p_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceCanAccessPeer /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_can_access_peer_params_t { ze_device_handle_t* phDevice; ze_device_handle_t* phPeerDevice; ze_bool_t** pvalue; } ze_device_can_access_peer_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceCanAccessPeer /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceCanAccessPeerCb_t)( ze_device_can_access_peer_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeDeviceGetStatus /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_device_get_status_params_t { ze_device_handle_t* phDevice; } ze_device_get_status_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeDeviceGetStatus /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnDeviceGetStatusCb_t)( ze_device_get_status_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Device callback functions pointers typedef struct _ze_device_callbacks_t { ze_pfnDeviceGetCb_t pfnGetCb; ze_pfnDeviceGetSubDevicesCb_t pfnGetSubDevicesCb; ze_pfnDeviceGetPropertiesCb_t pfnGetPropertiesCb; ze_pfnDeviceGetComputePropertiesCb_t pfnGetComputePropertiesCb; ze_pfnDeviceGetModulePropertiesCb_t pfnGetModulePropertiesCb; ze_pfnDeviceGetCommandQueueGroupPropertiesCb_t pfnGetCommandQueueGroupPropertiesCb; ze_pfnDeviceGetMemoryPropertiesCb_t pfnGetMemoryPropertiesCb; ze_pfnDeviceGetMemoryAccessPropertiesCb_t pfnGetMemoryAccessPropertiesCb; ze_pfnDeviceGetCachePropertiesCb_t pfnGetCachePropertiesCb; ze_pfnDeviceGetImagePropertiesCb_t pfnGetImagePropertiesCb; ze_pfnDeviceGetExternalMemoryPropertiesCb_t pfnGetExternalMemoryPropertiesCb; ze_pfnDeviceGetP2PPropertiesCb_t pfnGetP2PPropertiesCb; ze_pfnDeviceCanAccessPeerCb_t pfnCanAccessPeerCb; ze_pfnDeviceGetStatusCb_t pfnGetStatusCb; } ze_device_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_create_params_t { ze_driver_handle_t* phDriver; const ze_context_desc_t** pdesc; ze_context_handle_t** pphContext; } ze_context_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextCreateCb_t)( ze_context_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_destroy_params_t { ze_context_handle_t* phContext; } ze_context_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextDestroyCb_t)( ze_context_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextGetStatus /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_get_status_params_t { ze_context_handle_t* phContext; } ze_context_get_status_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextGetStatus /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextGetStatusCb_t)( ze_context_get_status_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextSystemBarrier /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_system_barrier_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; } ze_context_system_barrier_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextSystemBarrier /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextSystemBarrierCb_t)( ze_context_system_barrier_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextMakeMemoryResident /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_make_memory_resident_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; void** pptr; size_t* psize; } ze_context_make_memory_resident_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextMakeMemoryResident /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextMakeMemoryResidentCb_t)( ze_context_make_memory_resident_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextEvictMemory /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_evict_memory_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; void** pptr; size_t* psize; } ze_context_evict_memory_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextEvictMemory /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextEvictMemoryCb_t)( ze_context_evict_memory_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextMakeImageResident /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_make_image_resident_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; ze_image_handle_t* phImage; } ze_context_make_image_resident_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextMakeImageResident /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextMakeImageResidentCb_t)( ze_context_make_image_resident_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeContextEvictImage /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_context_evict_image_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; ze_image_handle_t* phImage; } ze_context_evict_image_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeContextEvictImage /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnContextEvictImageCb_t)( ze_context_evict_image_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Context callback functions pointers typedef struct _ze_context_callbacks_t { ze_pfnContextCreateCb_t pfnCreateCb; ze_pfnContextDestroyCb_t pfnDestroyCb; ze_pfnContextGetStatusCb_t pfnGetStatusCb; ze_pfnContextSystemBarrierCb_t pfnSystemBarrierCb; ze_pfnContextMakeMemoryResidentCb_t pfnMakeMemoryResidentCb; ze_pfnContextEvictMemoryCb_t pfnEvictMemoryCb; ze_pfnContextMakeImageResidentCb_t pfnMakeImageResidentCb; ze_pfnContextEvictImageCb_t pfnEvictImageCb; } ze_context_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandQueueCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_queue_create_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; const ze_command_queue_desc_t** pdesc; ze_command_queue_handle_t** pphCommandQueue; } ze_command_queue_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandQueueCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandQueueCreateCb_t)( ze_command_queue_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandQueueDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_queue_destroy_params_t { ze_command_queue_handle_t* phCommandQueue; } ze_command_queue_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandQueueDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandQueueDestroyCb_t)( ze_command_queue_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandQueueExecuteCommandLists /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_queue_execute_command_lists_params_t { ze_command_queue_handle_t* phCommandQueue; uint32_t* pnumCommandLists; ze_command_list_handle_t** pphCommandLists; ze_fence_handle_t* phFence; } ze_command_queue_execute_command_lists_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandQueueExecuteCommandLists /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandQueueExecuteCommandListsCb_t)( ze_command_queue_execute_command_lists_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandQueueSynchronize /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_queue_synchronize_params_t { ze_command_queue_handle_t* phCommandQueue; uint64_t* ptimeout; } ze_command_queue_synchronize_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandQueueSynchronize /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandQueueSynchronizeCb_t)( ze_command_queue_synchronize_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandQueue callback functions pointers typedef struct _ze_command_queue_callbacks_t { ze_pfnCommandQueueCreateCb_t pfnCreateCb; ze_pfnCommandQueueDestroyCb_t pfnDestroyCb; ze_pfnCommandQueueExecuteCommandListsCb_t pfnExecuteCommandListsCb; ze_pfnCommandQueueSynchronizeCb_t pfnSynchronizeCb; } ze_command_queue_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_create_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; const ze_command_list_desc_t** pdesc; ze_command_list_handle_t** pphCommandList; } ze_command_list_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListCreateCb_t)( ze_command_list_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListCreateImmediate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_create_immediate_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; const ze_command_queue_desc_t** paltdesc; ze_command_list_handle_t** pphCommandList; } ze_command_list_create_immediate_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListCreateImmediate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListCreateImmediateCb_t)( ze_command_list_create_immediate_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_destroy_params_t { ze_command_list_handle_t* phCommandList; } ze_command_list_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListDestroyCb_t)( ze_command_list_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListClose /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_close_params_t { ze_command_list_handle_t* phCommandList; } ze_command_list_close_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListClose /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListCloseCb_t)( ze_command_list_close_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListReset /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_reset_params_t { ze_command_list_handle_t* phCommandList; } ze_command_list_reset_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListReset /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListResetCb_t)( ze_command_list_reset_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendWriteGlobalTimestamp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_write_global_timestamp_params_t { ze_command_list_handle_t* phCommandList; uint64_t** pdstptr; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_write_global_timestamp_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendWriteGlobalTimestamp /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendWriteGlobalTimestampCb_t)( ze_command_list_append_write_global_timestamp_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendBarrier /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_barrier_params_t { ze_command_list_handle_t* phCommandList; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_barrier_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendBarrier /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendBarrierCb_t)( ze_command_list_append_barrier_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemoryRangesBarrier /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_memory_ranges_barrier_params_t { ze_command_list_handle_t* phCommandList; uint32_t* pnumRanges; const size_t** ppRangeSizes; const void*** ppRanges; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_memory_ranges_barrier_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemoryRangesBarrier /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemoryRangesBarrierCb_t)( ze_command_list_append_memory_ranges_barrier_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemoryCopy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_memory_copy_params_t { ze_command_list_handle_t* phCommandList; void** pdstptr; const void** psrcptr; size_t* psize; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_memory_copy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemoryCopy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemoryCopyCb_t)( ze_command_list_append_memory_copy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemoryFill /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_memory_fill_params_t { ze_command_list_handle_t* phCommandList; void** pptr; const void** ppattern; size_t* ppattern_size; size_t* psize; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_memory_fill_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemoryFill /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemoryFillCb_t)( ze_command_list_append_memory_fill_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemoryCopyRegion /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_memory_copy_region_params_t { ze_command_list_handle_t* phCommandList; void** pdstptr; const ze_copy_region_t** pdstRegion; uint32_t* pdstPitch; uint32_t* pdstSlicePitch; const void** psrcptr; const ze_copy_region_t** psrcRegion; uint32_t* psrcPitch; uint32_t* psrcSlicePitch; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_memory_copy_region_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemoryCopyRegion /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemoryCopyRegionCb_t)( ze_command_list_append_memory_copy_region_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemoryCopyFromContext /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_memory_copy_from_context_params_t { ze_command_list_handle_t* phCommandList; void** pdstptr; ze_context_handle_t* phContextSrc; const void** psrcptr; size_t* psize; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_memory_copy_from_context_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemoryCopyFromContext /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemoryCopyFromContextCb_t)( ze_command_list_append_memory_copy_from_context_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendImageCopy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_image_copy_params_t { ze_command_list_handle_t* phCommandList; ze_image_handle_t* phDstImage; ze_image_handle_t* phSrcImage; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_image_copy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendImageCopy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendImageCopyCb_t)( ze_command_list_append_image_copy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendImageCopyRegion /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_image_copy_region_params_t { ze_command_list_handle_t* phCommandList; ze_image_handle_t* phDstImage; ze_image_handle_t* phSrcImage; const ze_image_region_t** ppDstRegion; const ze_image_region_t** ppSrcRegion; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_image_copy_region_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendImageCopyRegion /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendImageCopyRegionCb_t)( ze_command_list_append_image_copy_region_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendImageCopyToMemory /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_image_copy_to_memory_params_t { ze_command_list_handle_t* phCommandList; void** pdstptr; ze_image_handle_t* phSrcImage; const ze_image_region_t** ppSrcRegion; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_image_copy_to_memory_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendImageCopyToMemory /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendImageCopyToMemoryCb_t)( ze_command_list_append_image_copy_to_memory_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendImageCopyFromMemory /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_image_copy_from_memory_params_t { ze_command_list_handle_t* phCommandList; ze_image_handle_t* phDstImage; const void** psrcptr; const ze_image_region_t** ppDstRegion; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_image_copy_from_memory_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendImageCopyFromMemory /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendImageCopyFromMemoryCb_t)( ze_command_list_append_image_copy_from_memory_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemoryPrefetch /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_memory_prefetch_params_t { ze_command_list_handle_t* phCommandList; const void** pptr; size_t* psize; } ze_command_list_append_memory_prefetch_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemoryPrefetch /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemoryPrefetchCb_t)( ze_command_list_append_memory_prefetch_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendMemAdvise /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_mem_advise_params_t { ze_command_list_handle_t* phCommandList; ze_device_handle_t* phDevice; const void** pptr; size_t* psize; ze_memory_advice_t* padvice; } ze_command_list_append_mem_advise_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendMemAdvise /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendMemAdviseCb_t)( ze_command_list_append_mem_advise_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendSignalEvent /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_signal_event_params_t { ze_command_list_handle_t* phCommandList; ze_event_handle_t* phEvent; } ze_command_list_append_signal_event_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendSignalEvent /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendSignalEventCb_t)( ze_command_list_append_signal_event_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendWaitOnEvents /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_wait_on_events_params_t { ze_command_list_handle_t* phCommandList; uint32_t* pnumEvents; ze_event_handle_t** pphEvents; } ze_command_list_append_wait_on_events_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendWaitOnEvents /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendWaitOnEventsCb_t)( ze_command_list_append_wait_on_events_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendEventReset /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_event_reset_params_t { ze_command_list_handle_t* phCommandList; ze_event_handle_t* phEvent; } ze_command_list_append_event_reset_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendEventReset /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendEventResetCb_t)( ze_command_list_append_event_reset_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendQueryKernelTimestamps /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_query_kernel_timestamps_params_t { ze_command_list_handle_t* phCommandList; uint32_t* pnumEvents; ze_event_handle_t** pphEvents; void** pdstptr; const size_t** ppOffsets; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_query_kernel_timestamps_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendQueryKernelTimestamps /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendQueryKernelTimestampsCb_t)( ze_command_list_append_query_kernel_timestamps_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendLaunchKernel /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_launch_kernel_params_t { ze_command_list_handle_t* phCommandList; ze_kernel_handle_t* phKernel; const ze_group_count_t** ppLaunchFuncArgs; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_launch_kernel_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendLaunchKernel /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendLaunchKernelCb_t)( ze_command_list_append_launch_kernel_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendLaunchCooperativeKernel /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_launch_cooperative_kernel_params_t { ze_command_list_handle_t* phCommandList; ze_kernel_handle_t* phKernel; const ze_group_count_t** ppLaunchFuncArgs; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_launch_cooperative_kernel_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendLaunchCooperativeKernel /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendLaunchCooperativeKernelCb_t)( ze_command_list_append_launch_cooperative_kernel_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendLaunchKernelIndirect /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_launch_kernel_indirect_params_t { ze_command_list_handle_t* phCommandList; ze_kernel_handle_t* phKernel; const ze_group_count_t** ppLaunchArgumentsBuffer; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_launch_kernel_indirect_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendLaunchKernelIndirect /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendLaunchKernelIndirectCb_t)( ze_command_list_append_launch_kernel_indirect_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeCommandListAppendLaunchMultipleKernelsIndirect /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_command_list_append_launch_multiple_kernels_indirect_params_t { ze_command_list_handle_t* phCommandList; uint32_t* pnumKernels; ze_kernel_handle_t** pphKernels; const uint32_t** ppCountBuffer; const ze_group_count_t** ppLaunchArgumentsBuffer; ze_event_handle_t* phSignalEvent; uint32_t* pnumWaitEvents; ze_event_handle_t** pphWaitEvents; } ze_command_list_append_launch_multiple_kernels_indirect_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeCommandListAppendLaunchMultipleKernelsIndirect /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t)( ze_command_list_append_launch_multiple_kernels_indirect_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandList callback functions pointers typedef struct _ze_command_list_callbacks_t { ze_pfnCommandListCreateCb_t pfnCreateCb; ze_pfnCommandListCreateImmediateCb_t pfnCreateImmediateCb; ze_pfnCommandListDestroyCb_t pfnDestroyCb; ze_pfnCommandListCloseCb_t pfnCloseCb; ze_pfnCommandListResetCb_t pfnResetCb; ze_pfnCommandListAppendWriteGlobalTimestampCb_t pfnAppendWriteGlobalTimestampCb; ze_pfnCommandListAppendBarrierCb_t pfnAppendBarrierCb; ze_pfnCommandListAppendMemoryRangesBarrierCb_t pfnAppendMemoryRangesBarrierCb; ze_pfnCommandListAppendMemoryCopyCb_t pfnAppendMemoryCopyCb; ze_pfnCommandListAppendMemoryFillCb_t pfnAppendMemoryFillCb; ze_pfnCommandListAppendMemoryCopyRegionCb_t pfnAppendMemoryCopyRegionCb; ze_pfnCommandListAppendMemoryCopyFromContextCb_t pfnAppendMemoryCopyFromContextCb; ze_pfnCommandListAppendImageCopyCb_t pfnAppendImageCopyCb; ze_pfnCommandListAppendImageCopyRegionCb_t pfnAppendImageCopyRegionCb; ze_pfnCommandListAppendImageCopyToMemoryCb_t pfnAppendImageCopyToMemoryCb; ze_pfnCommandListAppendImageCopyFromMemoryCb_t pfnAppendImageCopyFromMemoryCb; ze_pfnCommandListAppendMemoryPrefetchCb_t pfnAppendMemoryPrefetchCb; ze_pfnCommandListAppendMemAdviseCb_t pfnAppendMemAdviseCb; ze_pfnCommandListAppendSignalEventCb_t pfnAppendSignalEventCb; ze_pfnCommandListAppendWaitOnEventsCb_t pfnAppendWaitOnEventsCb; ze_pfnCommandListAppendEventResetCb_t pfnAppendEventResetCb; ze_pfnCommandListAppendQueryKernelTimestampsCb_t pfnAppendQueryKernelTimestampsCb; ze_pfnCommandListAppendLaunchKernelCb_t pfnAppendLaunchKernelCb; ze_pfnCommandListAppendLaunchCooperativeKernelCb_t pfnAppendLaunchCooperativeKernelCb; ze_pfnCommandListAppendLaunchKernelIndirectCb_t pfnAppendLaunchKernelIndirectCb; ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t pfnAppendLaunchMultipleKernelsIndirectCb; } ze_command_list_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeImageGetProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_image_get_properties_params_t { ze_device_handle_t* phDevice; const ze_image_desc_t** pdesc; ze_image_properties_t** ppImageProperties; } ze_image_get_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeImageGetProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnImageGetPropertiesCb_t)( ze_image_get_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeImageCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_image_create_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; const ze_image_desc_t** pdesc; ze_image_handle_t** pphImage; } ze_image_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeImageCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnImageCreateCb_t)( ze_image_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeImageDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_image_destroy_params_t { ze_image_handle_t* phImage; } ze_image_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeImageDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnImageDestroyCb_t)( ze_image_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Image callback functions pointers typedef struct _ze_image_callbacks_t { ze_pfnImageGetPropertiesCb_t pfnGetPropertiesCb; ze_pfnImageCreateCb_t pfnCreateCb; ze_pfnImageDestroyCb_t pfnDestroyCb; } ze_image_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemAllocShared /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_alloc_shared_params_t { ze_context_handle_t* phContext; const ze_device_mem_alloc_desc_t** pdevice_desc; const ze_host_mem_alloc_desc_t** phost_desc; size_t* psize; size_t* palignment; ze_device_handle_t* phDevice; void*** ppptr; } ze_mem_alloc_shared_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemAllocShared /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemAllocSharedCb_t)( ze_mem_alloc_shared_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemAllocDevice /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_alloc_device_params_t { ze_context_handle_t* phContext; const ze_device_mem_alloc_desc_t** pdevice_desc; size_t* psize; size_t* palignment; ze_device_handle_t* phDevice; void*** ppptr; } ze_mem_alloc_device_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemAllocDevice /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemAllocDeviceCb_t)( ze_mem_alloc_device_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemAllocHost /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_alloc_host_params_t { ze_context_handle_t* phContext; const ze_host_mem_alloc_desc_t** phost_desc; size_t* psize; size_t* palignment; void*** ppptr; } ze_mem_alloc_host_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemAllocHost /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemAllocHostCb_t)( ze_mem_alloc_host_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemFree /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_free_params_t { ze_context_handle_t* phContext; void** pptr; } ze_mem_free_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemFree /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemFreeCb_t)( ze_mem_free_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemGetAllocProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_get_alloc_properties_params_t { ze_context_handle_t* phContext; const void** pptr; ze_memory_allocation_properties_t** ppMemAllocProperties; ze_device_handle_t** pphDevice; } ze_mem_get_alloc_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemGetAllocProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemGetAllocPropertiesCb_t)( ze_mem_get_alloc_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemGetAddressRange /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_get_address_range_params_t { ze_context_handle_t* phContext; const void** pptr; void*** ppBase; size_t** ppSize; } ze_mem_get_address_range_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemGetAddressRange /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemGetAddressRangeCb_t)( ze_mem_get_address_range_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemGetIpcHandle /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_get_ipc_handle_params_t { ze_context_handle_t* phContext; const void** pptr; ze_ipc_mem_handle_t** ppIpcHandle; } ze_mem_get_ipc_handle_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemGetIpcHandle /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemGetIpcHandleCb_t)( ze_mem_get_ipc_handle_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemOpenIpcHandle /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_open_ipc_handle_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; ze_ipc_mem_handle_t* phandle; ze_ipc_memory_flags_t* pflags; void*** ppptr; } ze_mem_open_ipc_handle_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemOpenIpcHandle /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemOpenIpcHandleCb_t)( ze_mem_open_ipc_handle_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeMemCloseIpcHandle /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_mem_close_ipc_handle_params_t { ze_context_handle_t* phContext; const void** pptr; } ze_mem_close_ipc_handle_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeMemCloseIpcHandle /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnMemCloseIpcHandleCb_t)( ze_mem_close_ipc_handle_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Mem callback functions pointers typedef struct _ze_mem_callbacks_t { ze_pfnMemAllocSharedCb_t pfnAllocSharedCb; ze_pfnMemAllocDeviceCb_t pfnAllocDeviceCb; ze_pfnMemAllocHostCb_t pfnAllocHostCb; ze_pfnMemFreeCb_t pfnFreeCb; ze_pfnMemGetAllocPropertiesCb_t pfnGetAllocPropertiesCb; ze_pfnMemGetAddressRangeCb_t pfnGetAddressRangeCb; ze_pfnMemGetIpcHandleCb_t pfnGetIpcHandleCb; ze_pfnMemOpenIpcHandleCb_t pfnOpenIpcHandleCb; ze_pfnMemCloseIpcHandleCb_t pfnCloseIpcHandleCb; } ze_mem_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeFenceCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_fence_create_params_t { ze_command_queue_handle_t* phCommandQueue; const ze_fence_desc_t** pdesc; ze_fence_handle_t** pphFence; } ze_fence_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeFenceCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnFenceCreateCb_t)( ze_fence_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeFenceDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_fence_destroy_params_t { ze_fence_handle_t* phFence; } ze_fence_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeFenceDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnFenceDestroyCb_t)( ze_fence_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeFenceHostSynchronize /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_fence_host_synchronize_params_t { ze_fence_handle_t* phFence; uint64_t* ptimeout; } ze_fence_host_synchronize_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeFenceHostSynchronize /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnFenceHostSynchronizeCb_t)( ze_fence_host_synchronize_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeFenceQueryStatus /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_fence_query_status_params_t { ze_fence_handle_t* phFence; } ze_fence_query_status_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeFenceQueryStatus /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnFenceQueryStatusCb_t)( ze_fence_query_status_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeFenceReset /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_fence_reset_params_t { ze_fence_handle_t* phFence; } ze_fence_reset_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeFenceReset /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnFenceResetCb_t)( ze_fence_reset_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Fence callback functions pointers typedef struct _ze_fence_callbacks_t { ze_pfnFenceCreateCb_t pfnCreateCb; ze_pfnFenceDestroyCb_t pfnDestroyCb; ze_pfnFenceHostSynchronizeCb_t pfnHostSynchronizeCb; ze_pfnFenceQueryStatusCb_t pfnQueryStatusCb; ze_pfnFenceResetCb_t pfnResetCb; } ze_fence_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventPoolCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_pool_create_params_t { ze_context_handle_t* phContext; const ze_event_pool_desc_t** pdesc; uint32_t* pnumDevices; ze_device_handle_t** pphDevices; ze_event_pool_handle_t** pphEventPool; } ze_event_pool_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventPoolCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventPoolCreateCb_t)( ze_event_pool_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventPoolDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_pool_destroy_params_t { ze_event_pool_handle_t* phEventPool; } ze_event_pool_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventPoolDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventPoolDestroyCb_t)( ze_event_pool_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventPoolGetIpcHandle /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_pool_get_ipc_handle_params_t { ze_event_pool_handle_t* phEventPool; ze_ipc_event_pool_handle_t** pphIpc; } ze_event_pool_get_ipc_handle_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventPoolGetIpcHandle /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventPoolGetIpcHandleCb_t)( ze_event_pool_get_ipc_handle_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventPoolOpenIpcHandle /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_pool_open_ipc_handle_params_t { ze_context_handle_t* phContext; ze_ipc_event_pool_handle_t* phIpc; ze_event_pool_handle_t** pphEventPool; } ze_event_pool_open_ipc_handle_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventPoolOpenIpcHandle /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventPoolOpenIpcHandleCb_t)( ze_event_pool_open_ipc_handle_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventPoolCloseIpcHandle /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_pool_close_ipc_handle_params_t { ze_event_pool_handle_t* phEventPool; } ze_event_pool_close_ipc_handle_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventPoolCloseIpcHandle /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventPoolCloseIpcHandleCb_t)( ze_event_pool_close_ipc_handle_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of EventPool callback functions pointers typedef struct _ze_event_pool_callbacks_t { ze_pfnEventPoolCreateCb_t pfnCreateCb; ze_pfnEventPoolDestroyCb_t pfnDestroyCb; ze_pfnEventPoolGetIpcHandleCb_t pfnGetIpcHandleCb; ze_pfnEventPoolOpenIpcHandleCb_t pfnOpenIpcHandleCb; ze_pfnEventPoolCloseIpcHandleCb_t pfnCloseIpcHandleCb; } ze_event_pool_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_create_params_t { ze_event_pool_handle_t* phEventPool; const ze_event_desc_t** pdesc; ze_event_handle_t** pphEvent; } ze_event_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventCreateCb_t)( ze_event_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_destroy_params_t { ze_event_handle_t* phEvent; } ze_event_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventDestroyCb_t)( ze_event_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventHostSignal /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_host_signal_params_t { ze_event_handle_t* phEvent; } ze_event_host_signal_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventHostSignal /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventHostSignalCb_t)( ze_event_host_signal_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventHostSynchronize /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_host_synchronize_params_t { ze_event_handle_t* phEvent; uint64_t* ptimeout; } ze_event_host_synchronize_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventHostSynchronize /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventHostSynchronizeCb_t)( ze_event_host_synchronize_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventQueryStatus /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_query_status_params_t { ze_event_handle_t* phEvent; } ze_event_query_status_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventQueryStatus /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventQueryStatusCb_t)( ze_event_query_status_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventHostReset /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_host_reset_params_t { ze_event_handle_t* phEvent; } ze_event_host_reset_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventHostReset /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventHostResetCb_t)( ze_event_host_reset_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeEventQueryKernelTimestamp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_event_query_kernel_timestamp_params_t { ze_event_handle_t* phEvent; ze_kernel_timestamp_result_t** pdstptr; } ze_event_query_kernel_timestamp_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeEventQueryKernelTimestamp /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnEventQueryKernelTimestampCb_t)( ze_event_query_kernel_timestamp_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Event callback functions pointers typedef struct _ze_event_callbacks_t { ze_pfnEventCreateCb_t pfnCreateCb; ze_pfnEventDestroyCb_t pfnDestroyCb; ze_pfnEventHostSignalCb_t pfnHostSignalCb; ze_pfnEventHostSynchronizeCb_t pfnHostSynchronizeCb; ze_pfnEventQueryStatusCb_t pfnQueryStatusCb; ze_pfnEventHostResetCb_t pfnHostResetCb; ze_pfnEventQueryKernelTimestampCb_t pfnQueryKernelTimestampCb; } ze_event_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_create_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; const ze_module_desc_t** pdesc; ze_module_handle_t** pphModule; ze_module_build_log_handle_t** pphBuildLog; } ze_module_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleCreateCb_t)( ze_module_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_destroy_params_t { ze_module_handle_t* phModule; } ze_module_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleDestroyCb_t)( ze_module_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleDynamicLink /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_dynamic_link_params_t { uint32_t* pnumModules; ze_module_handle_t** pphModules; ze_module_build_log_handle_t** pphLinkLog; } ze_module_dynamic_link_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleDynamicLink /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleDynamicLinkCb_t)( ze_module_dynamic_link_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleGetNativeBinary /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_get_native_binary_params_t { ze_module_handle_t* phModule; size_t** ppSize; uint8_t** ppModuleNativeBinary; } ze_module_get_native_binary_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleGetNativeBinary /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleGetNativeBinaryCb_t)( ze_module_get_native_binary_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleGetGlobalPointer /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_get_global_pointer_params_t { ze_module_handle_t* phModule; const char** ppGlobalName; size_t** ppSize; void*** ppptr; } ze_module_get_global_pointer_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleGetGlobalPointer /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleGetGlobalPointerCb_t)( ze_module_get_global_pointer_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleGetKernelNames /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_get_kernel_names_params_t { ze_module_handle_t* phModule; uint32_t** ppCount; const char*** ppNames; } ze_module_get_kernel_names_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleGetKernelNames /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleGetKernelNamesCb_t)( ze_module_get_kernel_names_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleGetProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_get_properties_params_t { ze_module_handle_t* phModule; ze_module_properties_t** ppModuleProperties; } ze_module_get_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleGetProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleGetPropertiesCb_t)( ze_module_get_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleGetFunctionPointer /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_get_function_pointer_params_t { ze_module_handle_t* phModule; const char** ppFunctionName; void*** ppfnFunction; } ze_module_get_function_pointer_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleGetFunctionPointer /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleGetFunctionPointerCb_t)( ze_module_get_function_pointer_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Module callback functions pointers typedef struct _ze_module_callbacks_t { ze_pfnModuleCreateCb_t pfnCreateCb; ze_pfnModuleDestroyCb_t pfnDestroyCb; ze_pfnModuleDynamicLinkCb_t pfnDynamicLinkCb; ze_pfnModuleGetNativeBinaryCb_t pfnGetNativeBinaryCb; ze_pfnModuleGetGlobalPointerCb_t pfnGetGlobalPointerCb; ze_pfnModuleGetKernelNamesCb_t pfnGetKernelNamesCb; ze_pfnModuleGetPropertiesCb_t pfnGetPropertiesCb; ze_pfnModuleGetFunctionPointerCb_t pfnGetFunctionPointerCb; } ze_module_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleBuildLogDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_build_log_destroy_params_t { ze_module_build_log_handle_t* phModuleBuildLog; } ze_module_build_log_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleBuildLogDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleBuildLogDestroyCb_t)( ze_module_build_log_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeModuleBuildLogGetString /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_module_build_log_get_string_params_t { ze_module_build_log_handle_t* phModuleBuildLog; size_t** ppSize; char** ppBuildLog; } ze_module_build_log_get_string_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeModuleBuildLogGetString /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnModuleBuildLogGetStringCb_t)( ze_module_build_log_get_string_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of ModuleBuildLog callback functions pointers typedef struct _ze_module_build_log_callbacks_t { ze_pfnModuleBuildLogDestroyCb_t pfnDestroyCb; ze_pfnModuleBuildLogGetStringCb_t pfnGetStringCb; } ze_module_build_log_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_create_params_t { ze_module_handle_t* phModule; const ze_kernel_desc_t** pdesc; ze_kernel_handle_t** pphKernel; } ze_kernel_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelCreateCb_t)( ze_kernel_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_destroy_params_t { ze_kernel_handle_t* phKernel; } ze_kernel_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelDestroyCb_t)( ze_kernel_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelSetCacheConfig /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_set_cache_config_params_t { ze_kernel_handle_t* phKernel; ze_cache_config_flags_t* pflags; } ze_kernel_set_cache_config_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelSetCacheConfig /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelSetCacheConfigCb_t)( ze_kernel_set_cache_config_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelSetGroupSize /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_set_group_size_params_t { ze_kernel_handle_t* phKernel; uint32_t* pgroupSizeX; uint32_t* pgroupSizeY; uint32_t* pgroupSizeZ; } ze_kernel_set_group_size_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelSetGroupSize /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelSetGroupSizeCb_t)( ze_kernel_set_group_size_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelSuggestGroupSize /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_suggest_group_size_params_t { ze_kernel_handle_t* phKernel; uint32_t* pglobalSizeX; uint32_t* pglobalSizeY; uint32_t* pglobalSizeZ; uint32_t** pgroupSizeX; uint32_t** pgroupSizeY; uint32_t** pgroupSizeZ; } ze_kernel_suggest_group_size_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelSuggestGroupSize /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelSuggestGroupSizeCb_t)( ze_kernel_suggest_group_size_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelSuggestMaxCooperativeGroupCount /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_suggest_max_cooperative_group_count_params_t { ze_kernel_handle_t* phKernel; uint32_t** ptotalGroupCount; } ze_kernel_suggest_max_cooperative_group_count_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelSuggestMaxCooperativeGroupCount /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t)( ze_kernel_suggest_max_cooperative_group_count_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelSetArgumentValue /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_set_argument_value_params_t { ze_kernel_handle_t* phKernel; uint32_t* pargIndex; size_t* pargSize; const void** ppArgValue; } ze_kernel_set_argument_value_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelSetArgumentValue /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelSetArgumentValueCb_t)( ze_kernel_set_argument_value_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelSetIndirectAccess /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_set_indirect_access_params_t { ze_kernel_handle_t* phKernel; ze_kernel_indirect_access_flags_t* pflags; } ze_kernel_set_indirect_access_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelSetIndirectAccess /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelSetIndirectAccessCb_t)( ze_kernel_set_indirect_access_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelGetIndirectAccess /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_get_indirect_access_params_t { ze_kernel_handle_t* phKernel; ze_kernel_indirect_access_flags_t** ppFlags; } ze_kernel_get_indirect_access_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelGetIndirectAccess /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelGetIndirectAccessCb_t)( ze_kernel_get_indirect_access_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelGetSourceAttributes /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_get_source_attributes_params_t { ze_kernel_handle_t* phKernel; uint32_t** ppSize; char*** ppString; } ze_kernel_get_source_attributes_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelGetSourceAttributes /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelGetSourceAttributesCb_t)( ze_kernel_get_source_attributes_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelGetProperties /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_get_properties_params_t { ze_kernel_handle_t* phKernel; ze_kernel_properties_t** ppKernelProperties; } ze_kernel_get_properties_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelGetProperties /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelGetPropertiesCb_t)( ze_kernel_get_properties_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeKernelGetName /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_kernel_get_name_params_t { ze_kernel_handle_t* phKernel; size_t** ppSize; char** ppName; } ze_kernel_get_name_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeKernelGetName /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnKernelGetNameCb_t)( ze_kernel_get_name_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Kernel callback functions pointers typedef struct _ze_kernel_callbacks_t { ze_pfnKernelCreateCb_t pfnCreateCb; ze_pfnKernelDestroyCb_t pfnDestroyCb; ze_pfnKernelSetCacheConfigCb_t pfnSetCacheConfigCb; ze_pfnKernelSetGroupSizeCb_t pfnSetGroupSizeCb; ze_pfnKernelSuggestGroupSizeCb_t pfnSuggestGroupSizeCb; ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t pfnSuggestMaxCooperativeGroupCountCb; ze_pfnKernelSetArgumentValueCb_t pfnSetArgumentValueCb; ze_pfnKernelSetIndirectAccessCb_t pfnSetIndirectAccessCb; ze_pfnKernelGetIndirectAccessCb_t pfnGetIndirectAccessCb; ze_pfnKernelGetSourceAttributesCb_t pfnGetSourceAttributesCb; ze_pfnKernelGetPropertiesCb_t pfnGetPropertiesCb; ze_pfnKernelGetNameCb_t pfnGetNameCb; } ze_kernel_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeSamplerCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_sampler_create_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; const ze_sampler_desc_t** pdesc; ze_sampler_handle_t** pphSampler; } ze_sampler_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeSamplerCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnSamplerCreateCb_t)( ze_sampler_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeSamplerDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_sampler_destroy_params_t { ze_sampler_handle_t* phSampler; } ze_sampler_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeSamplerDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnSamplerDestroyCb_t)( ze_sampler_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Sampler callback functions pointers typedef struct _ze_sampler_callbacks_t { ze_pfnSamplerCreateCb_t pfnCreateCb; ze_pfnSamplerDestroyCb_t pfnDestroyCb; } ze_sampler_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zePhysicalMemCreate /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_physical_mem_create_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; ze_physical_mem_desc_t** pdesc; ze_physical_mem_handle_t** pphPhysicalMemory; } ze_physical_mem_create_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zePhysicalMemCreate /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnPhysicalMemCreateCb_t)( ze_physical_mem_create_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zePhysicalMemDestroy /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_physical_mem_destroy_params_t { ze_context_handle_t* phContext; ze_physical_mem_handle_t* phPhysicalMemory; } ze_physical_mem_destroy_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zePhysicalMemDestroy /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnPhysicalMemDestroyCb_t)( ze_physical_mem_destroy_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of PhysicalMem callback functions pointers typedef struct _ze_physical_mem_callbacks_t { ze_pfnPhysicalMemCreateCb_t pfnCreateCb; ze_pfnPhysicalMemDestroyCb_t pfnDestroyCb; } ze_physical_mem_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemReserve /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_reserve_params_t { ze_context_handle_t* phContext; const void** ppStart; size_t* psize; void*** ppptr; } ze_virtual_mem_reserve_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemReserve /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemReserveCb_t)( ze_virtual_mem_reserve_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemFree /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_free_params_t { ze_context_handle_t* phContext; const void** pptr; size_t* psize; } ze_virtual_mem_free_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemFree /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemFreeCb_t)( ze_virtual_mem_free_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemQueryPageSize /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_query_page_size_params_t { ze_context_handle_t* phContext; ze_device_handle_t* phDevice; size_t* psize; size_t** ppagesize; } ze_virtual_mem_query_page_size_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemQueryPageSize /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemQueryPageSizeCb_t)( ze_virtual_mem_query_page_size_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemMap /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_map_params_t { ze_context_handle_t* phContext; const void** pptr; size_t* psize; ze_physical_mem_handle_t* phPhysicalMemory; size_t* poffset; ze_memory_access_attribute_t* paccess; } ze_virtual_mem_map_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemMap /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemMapCb_t)( ze_virtual_mem_map_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemUnmap /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_unmap_params_t { ze_context_handle_t* phContext; const void** pptr; size_t* psize; } ze_virtual_mem_unmap_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemUnmap /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemUnmapCb_t)( ze_virtual_mem_unmap_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemSetAccessAttribute /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_set_access_attribute_params_t { ze_context_handle_t* phContext; const void** pptr; size_t* psize; ze_memory_access_attribute_t* paccess; } ze_virtual_mem_set_access_attribute_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemSetAccessAttribute /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemSetAccessAttributeCb_t)( ze_virtual_mem_set_access_attribute_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function parameters for zeVirtualMemGetAccessAttribute /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct _ze_virtual_mem_get_access_attribute_params_t { ze_context_handle_t* phContext; const void** pptr; size_t* psize; ze_memory_access_attribute_t** paccess; size_t** poutSize; } ze_virtual_mem_get_access_attribute_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Callback function-pointer for zeVirtualMemGetAccessAttribute /// @param[in] params Parameters passed to this instance /// @param[in] result Return value /// @param[in] pTracerUserData Per-Tracer user data /// @param[in,out] ppTracerInstanceUserData Per-Tracer, Per-Instance user data typedef void (ZE_APICALL *ze_pfnVirtualMemGetAccessAttributeCb_t)( ze_virtual_mem_get_access_attribute_params_t* params, ze_result_t result, void* pTracerUserData, void** ppTracerInstanceUserData ); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of VirtualMem callback functions pointers typedef struct _ze_virtual_mem_callbacks_t { ze_pfnVirtualMemReserveCb_t pfnReserveCb; ze_pfnVirtualMemFreeCb_t pfnFreeCb; ze_pfnVirtualMemQueryPageSizeCb_t pfnQueryPageSizeCb; ze_pfnVirtualMemMapCb_t pfnMapCb; ze_pfnVirtualMemUnmapCb_t pfnUnmapCb; ze_pfnVirtualMemSetAccessAttributeCb_t pfnSetAccessAttributeCb; ze_pfnVirtualMemGetAccessAttributeCb_t pfnGetAccessAttributeCb; } ze_virtual_mem_callbacks_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Container for all callbacks typedef struct _ze_callbacks_t { ze_global_callbacks_t Global; ze_driver_callbacks_t Driver; ze_device_callbacks_t Device; ze_context_callbacks_t Context; ze_command_queue_callbacks_t CommandQueue; ze_command_list_callbacks_t CommandList; ze_fence_callbacks_t Fence; ze_event_pool_callbacks_t EventPool; ze_event_callbacks_t Event; ze_image_callbacks_t Image; ze_module_callbacks_t Module; ze_module_build_log_callbacks_t ModuleBuildLog; ze_kernel_callbacks_t Kernel; ze_sampler_callbacks_t Sampler; ze_physical_mem_callbacks_t PhysicalMem; ze_mem_callbacks_t Mem; ze_virtual_mem_callbacks_t VirtualMem; } ze_callbacks_t; #if !defined(__GNUC__) #pragma endregion #endif #if defined(__cplusplus) } // extern "C" #endif #endif // _ZE_API_Hlevel-zero-raytracing-support-1.2.3/level_zero/ze_api_exp_ext.h000066400000000000000000000076101514453371700250140ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "ze_wrapper.h" /* except for following enums EXP and EXT API is binary compatible */ enum ze_result_helper_t { ZE_RESULT_HELPER_EXP_RTAS_BUILD_RETRY = ZE_RESULT_EXP_RTAS_BUILD_RETRY, ZE_RESULT_HELPER_EXP_RTAS_BUILD_DEFERRED = ZE_RESULT_EXP_RTAS_BUILD_DEFERRED, ZE_RESULT_HELPER_EXT_RTAS_BUILD_RETRY = ZE_RESULT_EXT_RTAS_BUILD_RETRY, ZE_RESULT_HELPER_EXT_RTAS_BUILD_DEFERRED = ZE_RESULT_EXT_RTAS_BUILD_DEFERRED, }; enum ze_structure_type_helper_t { ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXP_DESC = ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC, ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_BUILD_OP_EXP_DESC = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC, ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXP_PROPERTIES = ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES, ZE_STRUCTURE_TYPE_HELPER_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES = ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES, ZE_STRUCTURE_TYPE_HELPER_RTAS_DEVICE_EXP_PROPERTIES = ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES, ZE_STRUCTURE_TYPE_HELPER_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS = ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS, ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXT_DESC = ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_DESC, ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_BUILD_OP_EXT_DESC = ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXT_DESC, ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXT_PROPERTIES = ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_PROPERTIES, ZE_STRUCTURE_TYPE_HELPER_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES = ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES, ZE_STRUCTURE_TYPE_HELPER_RTAS_DEVICE_EXT_PROPERTIES = ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXT_PROPERTIES, ZE_STRUCTURE_TYPE_HELPER_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS = ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS, }; #define ZE_RESULT_EXP_RTAS_BUILD_RETRY (ze_result_t)((aty == EXP_API) ? ZE_RESULT_HELPER_EXP_RTAS_BUILD_RETRY : ZE_RESULT_HELPER_EXT_RTAS_BUILD_RETRY) #define ZE_RESULT_EXP_RTAS_BUILD_DEFERRED (ze_result_t)((aty == EXP_API) ? ZE_RESULT_HELPER_EXP_RTAS_BUILD_DEFERRED : ZE_RESULT_HELPER_EXT_RTAS_BUILD_DEFERRED) #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC (ze_structure_type_t)((aty == EXP_API) ? ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXP_DESC : ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXT_DESC) #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC (ze_structure_type_t)((aty == EXP_API) ? ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_BUILD_OP_EXP_DESC : ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_BUILD_OP_EXT_DESC) #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES (ze_structure_type_t)((aty == EXP_API) ? ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXP_PROPERTIES : ZE_STRUCTURE_TYPE_HELPER_RTAS_BUILDER_EXT_PROPERTIES) #define ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES (ze_structure_type_t)((aty == EXP_API) ? ZE_STRUCTURE_TYPE_HELPER_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES : ZE_STRUCTURE_TYPE_HELPER_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES) #define ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES (ze_structure_type_t)((aty == EXP_API) ? ZE_STRUCTURE_TYPE_HELPER_RTAS_DEVICE_EXP_PROPERTIES : ZE_STRUCTURE_TYPE_HELPER_RTAS_DEVICE_EXT_PROPERTIES) #define ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS (ze_structure_type_t)((aty == EXP_API) ? ZE_STRUCTURE_TYPE_HELPER_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS : ZE_STRUCTURE_TYPE_HELPER_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS) #define ZE_RESULT_EXT_RTAS_BUILD_RETRY #define ZE_RESULT_EXT_RTAS_BUILD_DEFERRED #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_DESC #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXT_DESC #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_PROPERTIES #define ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES #define ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXT_PROPERTIES #define ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS level-zero-raytracing-support-1.2.3/level_zero/ze_wrapper.cpp000066400000000000000000000761761514453371700245370ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 /* detect Linux platform */ #if defined(linux) || defined(__linux__) || defined(__LINUX__) # if !defined(__LINUX__) # define __LINUX__ # endif #endif #if defined(__LINUX__) #include #else #include #endif #include "ze_wrapper.h" #include "../rtbuild/rtbuild.h" #include #include #include #include #include ZeWrapper::RTAS_BUILD_MODE ZeWrapper::rtas_builder = ZeWrapper::INVALID; static std::mutex zeWrapperMutex; static void* handle = nullptr; static decltype(zeMemFree)* zeMemFreeInternal = nullptr; static decltype(zeMemAllocShared)* zeMemAllocSharedInternal = nullptr; static decltype(zeMemAllocDevice)* zeMemAllocDeviceInternal = nullptr; static decltype(zeDriverGetExtensionProperties)* zeDriverGetExtensionPropertiesInternal = nullptr; static decltype(zeDeviceGetProperties)* zeDeviceGetPropertiesInternal = nullptr; static decltype(zeDeviceGetModuleProperties)* zeDeviceGetModulePropertiesInternal = nullptr; static decltype(zeCommandListAppendMemoryCopy)* zeCommandListAppendMemoryCopyInternal = nullptr; static decltype(zeCommandQueueExecuteCommandLists)* zeCommandQueueExecuteCommandListsInternal = nullptr; static decltype(zeCommandListCreate)* zeCommandListCreateInternal = nullptr; static decltype(zeCommandListClose)* zeCommandListCloseInternal = nullptr; static decltype(zeCommandListDestroy)* zeCommandListDestroyInternal = nullptr; /* EXP version of API */ static decltype(zeRTASBuilderCreateExp)* zeRTASBuilderCreateExpInternal = nullptr; static decltype(zeRTASBuilderDestroyExp)* zeRTASBuilderDestroyExpInternal = nullptr; static decltype(zeDriverRTASFormatCompatibilityCheckExp)* zeDriverRTASFormatCompatibilityCheckExpInternal = nullptr; static decltype(zeRTASBuilderGetBuildPropertiesExp)* zeRTASBuilderGetBuildPropertiesExpInternal = nullptr; static decltype(zeRTASBuilderBuildExp)* zeRTASBuilderBuildExpInternal = nullptr; static decltype(zeRTASParallelOperationCreateExp)* zeRTASParallelOperationCreateExpInternal = nullptr; static decltype(zeRTASParallelOperationDestroyExp)* zeRTASParallelOperationDestroyExpInternal = nullptr; static decltype(zeRTASParallelOperationGetPropertiesExp)* zeRTASParallelOperationGetPropertiesExpInternal = nullptr; static decltype(zeRTASParallelOperationJoinExp)* zeRTASParallelOperationJoinExpInternal = nullptr; /* EXT version of API */ static decltype(zeRTASBuilderCreateExt)* zeRTASBuilderCreateExtInternal = nullptr; static decltype(zeRTASBuilderDestroyExt)* zeRTASBuilderDestroyExtInternal = nullptr; static decltype(zeDriverRTASFormatCompatibilityCheckExt)* zeDriverRTASFormatCompatibilityCheckExtInternal = nullptr; static decltype(zeRTASBuilderGetBuildPropertiesExt)* zeRTASBuilderGetBuildPropertiesExtInternal = nullptr; static decltype(zeRTASBuilderBuildExt)* zeRTASBuilderBuildExtInternal = nullptr; static decltype(zeRTASBuilderCommandListAppendCopyExt)* zeRTASBuilderCommandListAppendCopyExtInternal = nullptr; static decltype(zeRTASParallelOperationCreateExt)* zeRTASParallelOperationCreateExtInternal = nullptr; static decltype(zeRTASParallelOperationDestroyExt)* zeRTASParallelOperationDestroyExtInternal = nullptr; static decltype(zeRTASParallelOperationGetPropertiesExt)* zeRTASParallelOperationGetPropertiesExtInternal = nullptr; static decltype(zeRTASParallelOperationJoinExt)* zeRTASParallelOperationJoinExtInternal = nullptr; template T find_symbol(void* handle, std::string const& symbol) { #if defined(__LINUX__) T result = (T) dlsym(handle, symbol.c_str()); #else T result = (T) GetProcAddress((HMODULE)handle, symbol.c_str()); #endif if (!result) { throw std::runtime_error("level_zero wrapper: symbol " + symbol + " not found"); } return result; } void* load_module() { #if defined(__LINUX__) void* handle = dlopen(ZE_LOADER_NAME_LINUX,RTLD_LAZY); if (!handle) { throw std::runtime_error("module " ZE_LOADER_NAME_LINUX " not found"); } #else void* handle = LoadLibraryExA(ZE_LOADER_NAME_WINDOWS,NULL,LOAD_LIBRARY_SEARCH_SYSTEM32); if (!handle) { throw std::runtime_error("module " ZE_LOADER_NAME_WINDOWS " not found"); } #endif return handle; } void unload_module(void* handle) { if (handle) { #if defined(__LINUX__) dlclose(handle); #else FreeLibrary((HMODULE)handle); #endif } } ZeWrapper::~ZeWrapper() { unload_module(handle); } ze_result_t selectLevelZeroRTASBuilderExp(ze_driver_handle_t hDriver) { if (ZeWrapper::rtas_builder == ZeWrapper::LEVEL_ZERO) return ZE_RESULT_SUCCESS; auto zeRTASBuilderCreateExpTemp = find_symbol(handle,"zeRTASBuilderCreateExp"); auto zeRTASBuilderDestroyExpTemp = find_symbol(handle,"zeRTASBuilderDestroyExp"); ze_rtas_builder_exp_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC }; ze_rtas_builder_exp_handle_t hBuilder = nullptr; ze_result_t err = zeRTASBuilderCreateExpTemp(hDriver, &builderDesc, &hBuilder); /* when ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE is reported extension cannot get loaded */ if (err == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) return err; if (err == ZE_RESULT_SUCCESS) zeRTASBuilderDestroyExpTemp(hBuilder); zeRTASBuilderCreateExpInternal = zeRTASBuilderCreateExpTemp; zeRTASBuilderDestroyExpInternal = zeRTASBuilderDestroyExpTemp; zeDriverRTASFormatCompatibilityCheckExpInternal = find_symbol(handle,"zeDriverRTASFormatCompatibilityCheckExp"); zeRTASBuilderGetBuildPropertiesExpInternal = find_symbol(handle,"zeRTASBuilderGetBuildPropertiesExp"); zeRTASBuilderBuildExpInternal = find_symbol(handle,"zeRTASBuilderBuildExp"); zeRTASParallelOperationCreateExpInternal = find_symbol(handle,"zeRTASParallelOperationCreateExp"); zeRTASParallelOperationDestroyExpInternal = find_symbol(handle,"zeRTASParallelOperationDestroyExp"); zeRTASParallelOperationGetPropertiesExpInternal = find_symbol(handle,"zeRTASParallelOperationGetPropertiesExp"); zeRTASParallelOperationJoinExpInternal = find_symbol(handle,"zeRTASParallelOperationJoinExp"); ZeWrapper::rtas_builder = ZeWrapper::LEVEL_ZERO; return ZE_RESULT_SUCCESS; } ze_result_t selectLevelZeroRTASBuilderExt(ze_driver_handle_t hDriver) { if (ZeWrapper::rtas_builder == ZeWrapper::LEVEL_ZERO) return ZE_RESULT_SUCCESS; auto zeRTASBuilderCreateExtTemp = find_symbol(handle,"zeRTASBuilderCreateExt"); auto zeRTASBuilderDestroyExtTemp = find_symbol(handle,"zeRTASBuilderDestroyExt"); ze_rtas_builder_ext_desc_t builderDesc = { ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_DESC }; ze_rtas_builder_ext_handle_t hBuilder = nullptr; ze_result_t err = zeRTASBuilderCreateExtTemp(hDriver, &builderDesc, &hBuilder); /* when ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE is reported extension cannot get loaded */ if (err == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) return err; if (err == ZE_RESULT_SUCCESS) zeRTASBuilderDestroyExtTemp(hBuilder); zeRTASBuilderCreateExtInternal = zeRTASBuilderCreateExtTemp; zeRTASBuilderDestroyExtInternal = zeRTASBuilderDestroyExtTemp; zeDriverRTASFormatCompatibilityCheckExtInternal = find_symbol(handle,"zeDriverRTASFormatCompatibilityCheckExt"); zeRTASBuilderGetBuildPropertiesExtInternal = find_symbol(handle,"zeRTASBuilderGetBuildPropertiesExt"); zeRTASBuilderBuildExtInternal = find_symbol(handle,"zeRTASBuilderBuildExt"); zeRTASBuilderCommandListAppendCopyExtInternal = find_symbol(handle,"zeRTASBuilderCommandListAppendCopyExt"); zeRTASParallelOperationCreateExtInternal = find_symbol(handle,"zeRTASParallelOperationCreateExt"); zeRTASParallelOperationDestroyExtInternal = find_symbol(handle,"zeRTASParallelOperationDestroyExt"); zeRTASParallelOperationGetPropertiesExtInternal = find_symbol(handle,"zeRTASParallelOperationGetPropertiesExt"); zeRTASParallelOperationJoinExtInternal = find_symbol(handle,"zeRTASParallelOperationJoinExt"); ZeWrapper::rtas_builder = ZeWrapper::LEVEL_ZERO; return ZE_RESULT_SUCCESS; } ze_result_t selectLevelZeroRTASBuilder(API_TY aty, ze_driver_handle_t hDriver) { switch (aty) { case EXP_API: return selectLevelZeroRTASBuilderExp(hDriver); case EXT_API: return selectLevelZeroRTASBuilderExt(hDriver); default: assert(false); return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t selectInternalRTASBuilder() { #if defined(ZE_RAYTRACING_DISABLE_INTERNAL_BUILDER) throw std::runtime_error("internal builder disabled at compile time"); #else if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL) return ZE_RESULT_SUCCESS; zeRTASBuilderCreateExpInternal = &zeRTASBuilderCreateExpImpl; zeRTASBuilderDestroyExpInternal = &zeRTASBuilderDestroyExpImpl; zeDriverRTASFormatCompatibilityCheckExpInternal = &zeDriverRTASFormatCompatibilityCheckExpImpl; zeRTASBuilderGetBuildPropertiesExpInternal = &zeRTASBuilderGetBuildPropertiesExpImpl; zeRTASBuilderBuildExpInternal = &zeRTASBuilderBuildExpImpl; zeRTASBuilderCommandListAppendCopyExtInternal = zeCommandListAppendMemoryCopyInternal; zeRTASParallelOperationCreateExpInternal = &zeRTASParallelOperationCreateExpImpl; zeRTASParallelOperationDestroyExpInternal = &zeRTASParallelOperationDestroyExpImpl; zeRTASParallelOperationGetPropertiesExpInternal = &zeRTASParallelOperationGetPropertiesExpImpl; zeRTASParallelOperationJoinExpInternal = &zeRTASParallelOperationJoinExpImpl; zeRTASBuilderCreateExtInternal = &zeRTASBuilderCreateExtImpl; zeRTASBuilderDestroyExtInternal = &zeRTASBuilderDestroyExtImpl; zeDriverRTASFormatCompatibilityCheckExtInternal = &zeDriverRTASFormatCompatibilityCheckExtImpl; zeRTASBuilderGetBuildPropertiesExtInternal = &zeRTASBuilderGetBuildPropertiesExtImpl; zeRTASBuilderBuildExtInternal = &zeRTASBuilderBuildExtImpl; zeRTASParallelOperationCreateExtInternal = &zeRTASParallelOperationCreateExtImpl; zeRTASParallelOperationDestroyExtInternal = &zeRTASParallelOperationDestroyExtImpl; zeRTASParallelOperationGetPropertiesExtInternal = &zeRTASParallelOperationGetPropertiesExtImpl; zeRTASParallelOperationJoinExtInternal = &zeRTASParallelOperationJoinExtImpl; ZeWrapper::rtas_builder = ZeWrapper::INTERNAL; #endif return ZE_RESULT_SUCCESS; } ze_result_t ZeWrapper::init() { std::lock_guard lock(zeWrapperMutex); if (handle) return ZE_RESULT_SUCCESS; try { handle = load_module(); zeMemFreeInternal = find_symbol(handle, "zeMemFree"); zeMemAllocSharedInternal = find_symbol(handle, "zeMemAllocShared"); zeMemAllocDeviceInternal = find_symbol(handle, "zeMemAllocDevice"); zeDriverGetExtensionPropertiesInternal = find_symbol(handle, "zeDriverGetExtensionProperties"); zeDeviceGetPropertiesInternal = find_symbol(handle, "zeDeviceGetProperties"); zeDeviceGetModulePropertiesInternal = find_symbol(handle, "zeDeviceGetModuleProperties"); zeCommandListAppendMemoryCopyInternal = find_symbol(handle, "zeCommandListAppendMemoryCopy"); zeCommandQueueExecuteCommandListsInternal = find_symbol(handle, "zeCommandQueueExecuteCommandLists"); zeCommandListCreateInternal = find_symbol(handle, "zeCommandListCreate"); zeCommandListCloseInternal = find_symbol(handle, "zeCommandListClose"); zeCommandListDestroyInternal = find_symbol(handle, "zeCommandListDestroy"); } catch (std::exception& e) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t ZeWrapper::initRTASBuilder(API_TY aty, ze_driver_handle_t hDriver, RTAS_BUILD_MODE rtas_build_mode) { std::lock_guard lock(zeWrapperMutex); /* only select rtas builder once! */ if (rtas_builder == rtas_build_mode) return ZE_RESULT_SUCCESS; try { if (rtas_build_mode == RTAS_BUILD_MODE::INTERNAL) return selectInternalRTASBuilder(); else if (rtas_build_mode == RTAS_BUILD_MODE::LEVEL_ZERO) return selectLevelZeroRTASBuilder(aty,hDriver); else throw std::runtime_error("internal error"); } catch (std::exception& e) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t ZeWrapper::zeMemFree(ze_context_handle_t context, void* ptr) { if (!handle || !zeMemFreeInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeMemFreeInternal(context, ptr); } ze_result_t ZeWrapper::zeMemAllocShared(ze_context_handle_t context, const ze_device_mem_alloc_desc_t* descd, const ze_host_mem_alloc_desc_t* desch, size_t s0, size_t s1, ze_device_handle_t ze_handle, void** ptr) { if (!handle || !zeMemAllocSharedInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeMemAllocSharedInternal(context, descd, desch, s0, s1, ze_handle, ptr); } ze_result_t ZeWrapper::zeMemAllocDevice(ze_context_handle_t context, const ze_device_mem_alloc_desc_t* descd, size_t s0, size_t s1, ze_device_handle_t ze_handle, void** ptr) { if (!handle || !zeMemAllocDeviceInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeMemAllocDeviceInternal(context, descd, s0, s1, ze_handle, ptr); } ze_result_t ZeWrapper::zeDriverGetExtensionProperties(ze_driver_handle_t ze_handle, uint32_t* ptr, ze_driver_extension_properties_t* props) { if (!handle || !zeDriverGetExtensionPropertiesInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeDriverGetExtensionPropertiesInternal(ze_handle, ptr, props); } #define VALIDATE(arg) \ {\ ze_result_t result = validate(arg);\ if (result != ZE_RESULT_SUCCESS) return result; \ } ze_result_t validate(ze_device_handle_t hDevice) { if (hDevice == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; return ZE_RESULT_SUCCESS; } ze_result_t validate(ze_rtas_device_exp_properties_t* pProperties) { if (pProperties == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; //if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES) // return ZE_RESULT_ERROR_INVALID_ENUMERATION; //if (!checkDescChain((zet_base_desc_t_*)pProperties)) //return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t zeDeviceGetRTASPropertiesExp( const ze_device_handle_t hDevice, ze_rtas_device_exp_properties_t* pProperties ) { /* input validation */ VALIDATE(hDevice); VALIDATE(pProperties); /* fill properties */ pProperties->flags = 0; pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_INVALID; pProperties->rtasBufferAlignment = 128; /* check for supported device ID */ ze_device_properties_t device_props{ ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES }; ze_result_t status = ZeWrapper::zeDeviceGetProperties(hDevice, &device_props); if (status != ZE_RESULT_SUCCESS) return status; /* check for Intel vendor */ const uint32_t vendor_id = device_props.vendorId; const uint32_t device_id = device_props.deviceId; if (vendor_id != 0x8086) return ZE_RESULT_ERROR_UNKNOWN; /* disabling of device check through env variable */ const char* disable_device_check = getenv("EMBREE_DISABLE_DEVICEID_CHECK"); if (disable_device_check && strcmp(disable_device_check,"1") == 0) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } /* DG2 */ const bool dg2 = (0x4F80 <= device_id && device_id <= 0x4F88) || (0x5690 <= device_id && device_id <= 0x5698) || (0x56A0 <= device_id && device_id <= 0x56A6) || (0x56B0 <= device_id && device_id <= 0x56B3) || (0x56C0 <= device_id && device_id <= 0x56C1); if (dg2) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } /* BMG */ const bool bmg = (device_id == 0xE202) || (device_id == 0xE20B) || (device_id == 0xE20C) || (device_id == 0xE20D) || (device_id == 0xE210) || (device_id == 0xE212) || (device_id == 0xE216); if (bmg) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } /* PVC */ const bool pvc = (0x0BD5 <= device_id && device_id <= 0x0BDB) || (device_id == 0x0B69) || (device_id == 0x0B6E) || (device_id == 0x0BD4); if (pvc) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } /* MTL */ const bool mtl = (device_id == 0x7D40) || (device_id == 0x7D55) || (device_id == 0x7DD5) || (device_id == 0x7D45) || (device_id == 0x7D60); if (mtl) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } /* LNL */ bool lnl = device_id == 0x64A0 || device_id == 0x6420; if (lnl) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; return ZE_RESULT_SUCCESS; } /* PTL */ bool ptl = device_id == 0xB080 || device_id == 0xB081 || device_id == 0xB082 || device_id == 0xB083 || device_id == 0xB08F || device_id == 0xB090 || device_id == 0xB0A0 || device_id == 0xB0B0; if (ptl) { pProperties->rtasFormat = (ze_rtas_format_exp_t) ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t ZeWrapper::zeDeviceGetProperties(ze_device_handle_t ze_handle, ze_device_properties_t* props) { if (!handle || !zeDeviceGetPropertiesInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL) { if (props->pNext && (((ze_base_properties_t*)props->pNext)->stype == ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES || ((ze_base_properties_t*)props->pNext)->stype == ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXT_PROPERTIES)) { ze_result_t result = zeDeviceGetRTASPropertiesExp(ze_handle, (ze_rtas_device_exp_properties_t*)props->pNext); if (result != ZE_RESULT_SUCCESS) return result; void* pNext = props->pNext; props->pNext = ((ze_base_properties_t*)props->pNext)->pNext; result = zeDeviceGetPropertiesInternal(ze_handle, props); props->pNext = pNext; return result; } } return zeDeviceGetPropertiesInternal(ze_handle, props); } ze_result_t ZeWrapper::zeDeviceGetModuleProperties(ze_device_handle_t ze_handle, ze_device_module_properties_t* props) { if (!handle || !zeDeviceGetModulePropertiesInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeDeviceGetModulePropertiesInternal(ze_handle, props); } /* EXP version of API */ ze_result_t ZeWrapper::zeRTASBuilderCreateExp(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder) { if (!handle || !zeRTASBuilderCreateExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderCreateExpInternal(hDriver,pDescriptor,phBuilder); } ze_result_t ZeWrapper::zeRTASBuilderDestroyExp(ze_rtas_builder_exp_handle_t hBuilder) { if (!handle || !zeRTASBuilderDestroyExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderDestroyExpInternal(hBuilder); } ze_result_t ZeWrapper::zeDriverRTASFormatCompatibilityCheckExp( ze_driver_handle_t hDriver, const ze_rtas_format_exp_t accelFormat, const ze_rtas_format_exp_t otherAccelFormat) { if (!handle || !zeDriverRTASFormatCompatibilityCheckExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeDriverRTASFormatCompatibilityCheckExpInternal( hDriver, accelFormat, otherAccelFormat); } ze_result_t ZeWrapper::zeRTASBuilderGetBuildPropertiesExp(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, ze_rtas_builder_exp_properties_t* pProp) { if (!handle || !zeRTASBuilderGetBuildPropertiesExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderGetBuildPropertiesExpInternal(hBuilder, args, pProp); } ze_result_t ZeWrapper::zeRTASBuilderBuildExp(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_exp_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) { if (!handle || !zeRTASBuilderBuildExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderBuildExpInternal(hBuilder, args, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, hParallelOperation, pBuildUserPtr, pBounds, pRtasBufferSizeBytes); } ze_result_t ZeWrapper::zeRTASBuilderCommandListAppendCopyExp(ze_command_list_handle_t hCommandList, void* dstptr, const void* srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t* phWaitEvents) { if (!handle || !zeCommandListAppendMemoryCopyInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeCommandListAppendMemoryCopyInternal(hCommandList,dstptr,srcptr,size,hSignalEvent,numWaitEvents,phWaitEvents); // EXP API does not have proper copy function } ze_result_t ZeWrapper::zeCommandQueueExecuteCommandLists(ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t* phCommandLists, ze_fence_handle_t hFence) { if (!handle || !zeCommandQueueExecuteCommandListsInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeCommandQueueExecuteCommandListsInternal(hCommandQueue, numCommandLists, phCommandLists, hFence); } ze_result_t ZeWrapper::zeCommandListCreate(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_list_desc_t* desc, ze_command_list_handle_t* phCommandList) { if (!handle || !zeCommandListCreateInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeCommandListCreateInternal(hContext, hDevice, desc, phCommandList); } ze_result_t ZeWrapper::zeCommandListClose(ze_command_list_handle_t hCommandList) { if (!handle || !zeCommandListCloseInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeCommandListCloseInternal(hCommandList); } ze_result_t ZeWrapper::zeCommandListDestroy(ze_command_list_handle_t hCommandList) { if (!handle || !zeCommandListDestroyInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeCommandListDestroyInternal(hCommandList); } ze_result_t ZeWrapper::zeRTASParallelOperationCreateExp(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation) { if (!handle || !zeRTASParallelOperationCreateExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationCreateExpInternal(hDriver, phParallelOperation); } ze_result_t ZeWrapper::zeRTASParallelOperationDestroyExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ) { if (!handle || !zeRTASParallelOperationDestroyExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationDestroyExpInternal( hParallelOperation ); }; ze_result_t ZeWrapper::zeRTASParallelOperationGetPropertiesExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ) { if (!handle || !zeRTASParallelOperationGetPropertiesExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationGetPropertiesExpInternal( hParallelOperation, pProperties ); } ze_result_t ZeWrapper::zeRTASParallelOperationJoinExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation) { if (!handle || !zeRTASParallelOperationJoinExpInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationJoinExpInternal(hParallelOperation); } /* EXT version of API */ ze_result_t ZeWrapper::zeRTASBuilderCreateExt(ze_driver_handle_t hDriver, const ze_rtas_builder_ext_desc_t *pDescriptor, ze_rtas_builder_ext_handle_t *phBuilder) { if (!handle || !zeRTASBuilderCreateExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderCreateExtInternal(hDriver,pDescriptor,phBuilder); } ze_result_t ZeWrapper::zeRTASBuilderDestroyExt(ze_rtas_builder_ext_handle_t hBuilder) { if (!handle || !zeRTASBuilderDestroyExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderDestroyExtInternal(hBuilder); } ze_result_t ZeWrapper::zeDriverRTASFormatCompatibilityCheckExt( ze_driver_handle_t hDriver, const ze_rtas_format_ext_t accelFormat, const ze_rtas_format_ext_t otherAccelFormat) { if (!handle || !zeDriverRTASFormatCompatibilityCheckExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeDriverRTASFormatCompatibilityCheckExtInternal( hDriver, accelFormat, otherAccelFormat); } ze_result_t ZeWrapper::zeRTASBuilderGetBuildPropertiesExt(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, ze_rtas_builder_ext_properties_t* pProp) { if (!handle || !zeRTASBuilderGetBuildPropertiesExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderGetBuildPropertiesExtInternal(hBuilder, args, pProp); } ze_result_t ZeWrapper::zeRTASBuilderBuildExt(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_ext_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_ext_t *pBounds, size_t *pRtasBufferSizeBytes) { if (!handle || !zeRTASBuilderBuildExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderBuildExtInternal(hBuilder, args, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, hParallelOperation, pBuildUserPtr, pBounds, pRtasBufferSizeBytes); } ze_result_t ZeWrapper::zeRTASBuilderCommandListAppendCopyExt(ze_command_list_handle_t hCommandList, void* dstptr, const void* srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t* phWaitEvents) { if (!handle || !zeRTASBuilderCommandListAppendCopyExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASBuilderCommandListAppendCopyExtInternal(hCommandList,dstptr,srcptr,size,hSignalEvent,numWaitEvents,phWaitEvents); } ze_result_t ZeWrapper::zeRTASParallelOperationCreateExt(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_ext_handle_t* phParallelOperation) { if (!handle || !zeRTASParallelOperationCreateExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationCreateExtInternal(hDriver, phParallelOperation); } ze_result_t ZeWrapper::zeRTASParallelOperationDestroyExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation ) { if (!handle || !zeRTASParallelOperationDestroyExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationDestroyExtInternal( hParallelOperation ); }; ze_result_t ZeWrapper::zeRTASParallelOperationGetPropertiesExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation, ze_rtas_parallel_operation_ext_properties_t* pProperties ) { if (!handle || !zeRTASParallelOperationGetPropertiesExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationGetPropertiesExtInternal( hParallelOperation, pProperties ); } ze_result_t ZeWrapper::zeRTASParallelOperationJoinExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation) { if (!handle || !zeRTASParallelOperationJoinExtInternal) throw std::runtime_error("ZeWrapper not initialized, call ZeWrapper::init() first."); return zeRTASParallelOperationJoinExtInternal(hParallelOperation); } level-zero-raytracing-support-1.2.3/level_zero/ze_wrapper.h000066400000000000000000000170621514453371700241710ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "ze_api.h" /* API Type */ enum API_TY { EXT_API = 0, EXP_API = 1 }; ////////////////////// // Debug extension #define ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_DESC ((ze_structure_type_t)0x00020020) ///< ::ze_rtas_builder_build_op_debug_desc_t typedef struct _ze_rtas_builder_build_op_debug_desc_t { ze_structure_type_t stype; ///< [in] type of this structure const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). void* dispatchGlobalsPtr; } ze_rtas_builder_build_op_debug_desc_t; //////////////////// struct ZeWrapper { enum RTAS_BUILD_MODE { INVALID = 0, INTERNAL = 1, // use internal RTAS build implementation LEVEL_ZERO = 2, // use Level Zero provided RTAS build implementation }; ~ZeWrapper(); static ze_result_t init(); static ze_result_t initRTASBuilder(API_TY aty, ze_driver_handle_t hDriver, RTAS_BUILD_MODE rtas_build_mode); static ze_result_t zeMemFree(ze_context_handle_t, void*); static ze_result_t zeMemAllocShared(ze_context_handle_t, const ze_device_mem_alloc_desc_t*, const ze_host_mem_alloc_desc_t*, size_t, size_t, ze_device_handle_t, void**); static ze_result_t zeMemAllocDevice(ze_context_handle_t, const ze_device_mem_alloc_desc_t*, size_t, size_t, ze_device_handle_t, void**); static ze_result_t zeDriverGetExtensionProperties(ze_driver_handle_t, uint32_t*, ze_driver_extension_properties_t*); static ze_result_t zeDeviceGetProperties(ze_device_handle_t, ze_device_properties_t*); static ze_result_t zeDeviceGetModuleProperties(ze_device_handle_t, ze_device_module_properties_t*); static ze_result_t zeCommandQueueExecuteCommandLists(ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t* phCommandLists, ze_fence_handle_t hFence); static ze_result_t zeCommandListCreate(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_list_desc_t* desc, ze_command_list_handle_t* phCommandList); static ze_result_t zeCommandListClose(ze_command_list_handle_t hCommandList); static ze_result_t zeCommandListDestroy(ze_command_list_handle_t hCommandList); /* EXP version of API */ static ze_result_t zeRTASBuilderCreateExp(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder); static ze_result_t zeRTASBuilderDestroyExp(ze_rtas_builder_exp_handle_t hBuilder); static ze_result_t zeDriverRTASFormatCompatibilityCheckExp( ze_driver_handle_t hDriver, const ze_rtas_format_exp_t accelFormat, const ze_rtas_format_exp_t otherAccelFormat); static ze_result_t zeRTASBuilderGetBuildPropertiesExp(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, ze_rtas_builder_exp_properties_t* pProp); static ze_result_t zeRTASBuilderBuildExp(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_exp_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes); static ze_result_t zeRTASBuilderCommandListAppendCopyExp(ze_command_list_handle_t hCommandList, void* dstptr, const void* srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t* phWaitEvents); static ze_result_t zeRTASParallelOperationCreateExp(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation); static ze_result_t zeRTASParallelOperationDestroyExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ); static ze_result_t zeRTASParallelOperationGetPropertiesExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ); static ze_result_t zeRTASParallelOperationJoinExp( ze_rtas_parallel_operation_exp_handle_t hParallelOperation); /* EXT version of API */ static ze_result_t zeRTASBuilderCreateExt(ze_driver_handle_t hDriver, const ze_rtas_builder_ext_desc_t *pDescriptor, ze_rtas_builder_ext_handle_t *phBuilder); static ze_result_t zeRTASBuilderDestroyExt(ze_rtas_builder_ext_handle_t hBuilder); static ze_result_t zeDriverRTASFormatCompatibilityCheckExt( ze_driver_handle_t hDriver, const ze_rtas_format_ext_t accelFormat, const ze_rtas_format_ext_t otherAccelFormat); static ze_result_t zeRTASBuilderGetBuildPropertiesExt(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, ze_rtas_builder_ext_properties_t* pProp); static ze_result_t zeRTASBuilderBuildExt(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_ext_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_ext_t *pBounds, size_t *pRtasBufferSizeBytes); static ze_result_t zeRTASBuilderCommandListAppendCopyExt(ze_command_list_handle_t hCommandList, void* dstptr, const void* srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t* phWaitEvents); static ze_result_t zeRTASParallelOperationCreateExt(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_ext_handle_t* phParallelOperation); static ze_result_t zeRTASParallelOperationDestroyExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation ); static ze_result_t zeRTASParallelOperationGetPropertiesExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation, ze_rtas_parallel_operation_ext_properties_t* pProperties ); static ze_result_t zeRTASParallelOperationJoinExt( ze_rtas_parallel_operation_ext_handle_t hParallelOperation); static RTAS_BUILD_MODE rtas_builder; }; level-zero-raytracing-support-1.2.3/level_zero_raytracing.rc.in000066400000000000000000000025211514453371700250120ustar00rootroot00000000000000#define VER_FILEVERSION @ZE_RAYTRACING_VERSION_MAJOR@,@ZE_RAYTRACING_VERSION_MINOR@,@ZE_RAYTRACING_VERSION_PATCH@ #define VER_FILEVERSION_STR "@ZE_RAYTRACING_VERSION_MAJOR@.@ZE_RAYTRACING_VERSION_MINOR@.@ZE_RAYTRACING_VERSION_PATCH@" #define VER_PRODUCTVERSION @ZE_RAYTRACING_VERSION_MAJOR@,@ZE_RAYTRACING_VERSION_MINOR@,@ZE_RAYTRACING_VERSION_PATCH@ #define VER_PRODUCTVERSION_STR "@ZE_RAYTRACING_VERSION_MAJOR@.@ZE_RAYTRACING_VERSION_MINOR@.@ZE_RAYTRACING_VERSION_PATCH@" #define VER_FILEDESCRIPTION_STR "oneAPI Level Zero Ray Tracing Support for Windows(R) Level Zero Drivers" #define VER_PRODUCT_NAME_STR "oneAPI Level Zero Ray Tracing Support for Windows(R)" #define VER_LEGALCOPYRIGHT_STR "Copyright (C) 2023 Intel Corporation" 1 VERSIONINFO FILEVERSION VER_FILEVERSION PRODUCTVERSION VER_PRODUCTVERSION BEGIN BLOCK "StringFileInfo" BEGIN BLOCK "040904E4" BEGIN VALUE "FileDescription", VER_FILEDESCRIPTION_STR VALUE "FileVersion", VER_FILEVERSION_STR VALUE "ProductVersion", VER_PRODUCTVERSION_STR VALUE "ProductName", VER_PRODUCT_NAME_STR VALUE "LegalCopyright", VER_LEGALCOPYRIGHT_STR END END BLOCK "VarFileInfo" BEGIN VALUE "Translation", 0x409, 1252 END END level-zero-raytracing-support-1.2.3/packaging/000077500000000000000000000000001514453371700214125ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/packaging/debian/000077500000000000000000000000001514453371700226345ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/packaging/debian/changelog000066400000000000000000000061141514453371700245100ustar00rootroot00000000000000 intel-level-zero-gpu-raytracing (1.2.3) UNRELEASED; urgency=medium * Updating default TBB version for static TBB build to v2022.3.0. * Fixed potential global variable initialization order issue when linking TBB statically. * Moved packaging files to separate packaging folder. -- Sven Woop Mon Feb 16 07:08:50 AM CET 2026 intel-level-zero-gpu-raytracing (1.2.2) UNRELEASED; urgency=medium * Added headers for TBB 2021.6.0 and avoiding fetching headers of that TBB version. -- Sven Woop Tue Oct 28 09:41:11 AM CET 2025 intel-level-zero-gpu-raytracing (1.2.1) UNRELEASED; urgency=medium * Added support to specify TBB headers to use for compilation -- Sven Woop Thu Oct 23 09:41:00 AM CEST 2025 intel-level-zero-gpu-raytracing (1.2.0) UNRELEASED; urgency=medium * Added support for Level Zero Extension ZE_extension_rtas * Updated to Level Zero API header 1.13.1 * Fixed wrong assertion that triggered when using device memory for RTAS. * Fixed compile issues when AVX was enabled. -- Sven Woop Fri, 05 Sep 2025 10:38:00 +0200 intel-level-zero-gpu-raytracing (1.1.0) oracular; urgency=medium [ Sven Woop ] * Added support for PTL RTAS layout. [ Shane McKee ] * Change to oracular for Intel Graphics PPA * Blended in changes from Pavel's PR#7 during conflict resolution -- Shane McKee Fri, 02 May 2025 20:12:54 +0400 intel-level-zero-gpu-raytracing (1.0.0-0ubuntu1~24.10~ppa5) oracular; urgency=medium * Change package name to fit soname * Create noble version -- Shane McKee Wed, 29 Jan 2025 11:15:53 +0800 intel-level-zero-gpu-raytracing (1.0.0-0ubuntu1~24.10~ppa4) oracular; urgency=medium * Remove pkg compression override and drop 3rd party docs -- Pavel Androniychuk Wed, 23 Oct 2024 16:51:24 -0700 intel-level-zero-gpu-raytracing (1.0.0-0ubuntu1~24.10~ppa3) oracular; urgency=medium * Change dependency to pull libtbb-dev from archive -- piandro-mobl2 Mon, 30 Sep 2024 08:41:42 -0700 intel-level-zero-gpu-raytracing (1.0.0) UNRELEASED; urgency=medium * Sync debian folder from upstream * Correct build depends for libtbb12 -- Pavel Androniychuk Thu, 10 Oct 2024 13:28:32 -0700 intel-level-zero-gpu-raytracing (1.0.0-0ubuntu1~24.10~ppa2) oracular; urgency=medium * Corrected the name of a dependency on libze1 -- Shane McKee Thu, 12 Sep 2024 21:26:10 +0800 intel-level-zero-gpu-raytracing (1.0.0-0ubuntu1~24.10~ppa1) oracular; urgency=medium * Update the version scheme for multiple different versions in the PPA * Changed to oracular * Changed to build without TBB static build (created a separate package for that) -- Shane McKee Thu, 05 Sep 2024 17:50:23 +0800 intel-level-zero-gpu-raytracing (1.0.0) experimental; urgency=medium * First Release -- Pavel Androniychuk Fri, 21 Apr 2023 15:46:02 -0800 level-zero-raytracing-support-1.2.3/packaging/debian/compat000066400000000000000000000000031514453371700240330ustar00rootroot0000000000000011 level-zero-raytracing-support-1.2.3/packaging/debian/control000066400000000000000000000020221514453371700242330ustar00rootroot00000000000000Source: intel-level-zero-gpu-raytracing Section: libs Priority: optional Maintainer: Intel Graphics Team Build-Depends: debhelper (>= 11), cmake, ninja-build, pkg-config, make, gcc, git, libpthread-stubs0-dev, libtbb-dev Standards-Version: 4.3.0 Homepage: https://github.com/intel/level-zero-raytracing-support Package: libze-intel-gpu-raytracing Replaces: intel-level-zero-gpu-raytracing Conflicts: intel-level-zero-gpu-raytracing Architecture: amd64 Depends: ${misc:Depends}, ${shlibs:Depends}, level-zero (>=1.17.44) | libze1 (>=1.21.1) Description: oneAPI Level Zero Ray Tracing Support library A library that provides high-performance CPU-based construction algorithms for 3D acceleration structures, designed to work with the ray tracing hardware of Intel GPUs. Intel(R) oneAPI Level Zero uses this library to implement the Ray Tracing Acceleration Structures (RTAS) builder extension. The library is intended for use exclusively through Level Zero and is not meant to be accessed directly. level-zero-raytracing-support-1.2.3/packaging/debian/copyright000066400000000000000000000016141514453371700245710ustar00rootroot00000000000000Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: level-zero-raytracing Source: https://github.com/intel/level-zero-raytracing-support Files: * Copyright: 2024 Intel Corporation License: Apache-2.0 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at . http://www.apache.org/licenses/LICENSE-2.0 . Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Comment: On Debian systems, the full text of the Apache-2.0 license can be found in the file '/usr/share/common-licenses/Apache-2.0'. level-zero-raytracing-support-1.2.3/packaging/debian/intel-level-zero-gpu-raytracing.docs000066400000000000000000000000311514453371700316270ustar00rootroot00000000000000third-party-programs.txt intel-level-zero-gpu-raytracing.lintian-overrides000066400000000000000000000001541514453371700342640ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/packaging/debianshared-library-lacks-version no-symbols-control-file usr/lib/x86_64-linux-gnu/libze_intel_gpu_raytracing.so level-zero-raytracing-support-1.2.3/packaging/debian/rules000077500000000000000000000004761514453371700237230ustar00rootroot00000000000000#!/usr/bin/make -f %: dh $@ --builddir build --buildsystem=cmake+ninja override_dh_auto_configure: dh_auto_configure -- \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=/usr \ -DZE_RAYTRACING_TBB=inject_headers override_dh_auto_clean: dh_auto_clean rm -rf build/ rm level_zero_raytracing.rc || true level-zero-raytracing-support-1.2.3/packaging/debian/source/000077500000000000000000000000001514453371700241345ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/packaging/debian/source/format000066400000000000000000000000151514453371700253430ustar00rootroot000000000000003.0 (native) level-zero-raytracing-support-1.2.3/packaging/rhel/000077500000000000000000000000001514453371700223445ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/packaging/rhel/intel-level-zero-gpu-raytracing.spec000066400000000000000000000043261514453371700313540ustar00rootroot00000000000000#it's changed by external script %global ver 1.2.3 %global rel 1 Name: intel-level-zero-gpu-raytracing Version: %{ver} Release: %{rel}%{?dist} Summary: oneAPI Level Zero Ray Tracing Support Group: System Environment/Libraries License: Apache2 URL: https://github.com/oneapi-src/level-zero-raytracing Source0: %{url}/archive/%{ver}/intel-level-zero-gpu-raytracing-%{ver}.tar.gz BuildRequires: make gcc-c++ cmake ninja-build git pkg-config tbb-devel Requires: tbb %description A library that provides high-performance CPU-based construction algorithms for 3D acceleration structures, designed to work with the ray tracing hardware of Intel GPUs. Intel(R) oneAPI Level Zero uses this library to implement the Ray Tracing Acceleration Structures (RTAS) builder extension. The library is intended for use exclusively through Level Zero and is not meant to be accessed directly. %prep %autosetup -p1 -n %{name}-%{ver} %build mkdir build cd build %cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=/usr \ -DZE_RAYTRACING_TBB=inject_headers %make_build %install cd build %make_install %files %defattr(-,root,root) %config(noreplace) %license LICENSE.txt %license third-party-programs* %{_libdir}/libze_intel_gpu_raytracing.so %doc %changelog * Mon Feb 16 2026 Sven Woop - 1.2.3 - Updating default TBB version for static TBB build to v2022.3.0. - Fixed potential global variable initialization order issue when linking TBB statically. - Moved packaging files to separate packaging folder. * Thu Oct 28 2025 Sven Woop - 1.2.2 - Added headers for TBB 2021.6.0 and avoiding fetching headers of that TBB version. * Thu Oct 23 2025 Sven Woop - 1.2.1 - Added support to specify TBB headers to use for compilation * Fri Sep 5 2025 Sven Woop - 1.2.0 - Added support for Level Zero Extension ZE_extension_rtas - Updated to Level Zero API header 1.13.1 - Fixed wrong assertion that triggered when using device memory for RTAS. - Fixed compile issues when AVX was enabled. * Thu Mar 6 2025 Sven Woop - 1.1.0 - Added support for PTL RTAS layout. * Thu Jun 8 2023 Pavel Androniychuk - 1.0.0 - Spec file init level-zero-raytracing-support-1.2.3/packaging/sle/000077500000000000000000000000001514453371700221755ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/packaging/sle/intel-level-zero-gpu-raytracing.spec000066400000000000000000000056011514453371700312020ustar00rootroot00000000000000# # spec file for package level-zero-raytracing # # Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed # upon. The license for this file, and modifications and additions to the # file, is the same license as for the pristine package itself (unless the # license for the pristine package is not an Open Source License, in which # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. # Please submit bugfixes or comments via https://bugs.opensuse.org/ # #it's changed by external script %global ver 1.2.3 %global rel 1 Name: intel-level-zero-gpu-raytracing Version: %{ver} Release: %{rel}%{?dist} Summary: oneAPI Level Zero Ray Tracing Support Group: System Environment/Libraries License: Apache2 URL: https://github.com/oneapi-src/level-zero-raytracing Source0: %{url}/archive/%{ver}/intel-level-zero-gpu-raytracing-%{ver}.tar.gz BuildRequires: make gcc-c++ cmake git pkg-config %description A library that provides high-performance CPU-based construction algorithms for 3D acceleration structures, designed to work with the ray tracing hardware of Intel GPUs. Intel(R) oneAPI Level Zero uses this library to implement the Ray Tracing Acceleration Structures (RTAS) builder extension. The library is intended for use exclusively through Level Zero and is not meant to be accessed directly. %debug %prep %autosetup -p1 -n %{name}-%{ver} %build %cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=/usr \ -DZE_RAYTRACING_TBB=build_static %make_build %install cd build %make_install %files %defattr(-,root,root) %config(noreplace) %license LICENSE.txt %license third-party-programs* %{_libdir}/libze_intel_gpu_raytracing.so %doc %changelog * Mon Feb 16 2026 Sven Woop - 1.2.3 - Updating default TBB version for static TBB build to v2022.3.0. - Fixed potential global variable initialization order issue when linking TBB statically. - Moved packaging files to separate packaging folder. * Thu Oct 28 2025 Sven Woop - 1.2.2 - Added headers for TBB 2021.6.0 and avoiding fetching headers of that TBB version. * Thu Oct 23 2025 Sven Woop - 1.2.1 - Added support to specify TBB headers to use for compilation * Fri Sep 5 2025 Sven Woop - 1.2.0 - Added support for Level Zero Extension ZE_extension_rtas - Updated to Level Zero API header 1.13.1 - Fixed wrong assertion that triggered when using device memory for RTAS. - Fixed compile issues when AVX was enabled. * Thu Mar 6 2025 Sven Woop - 1.1.0 - Added support for PTL RTAS layout. * Thu Jun 8 2023 Pavel Androniychuk - 1.0.0 - Spec file init level-zero-raytracing-support-1.2.3/rtbuild/000077500000000000000000000000001514453371700211335ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rtbuild/CMakeLists.txt000066400000000000000000000021511514453371700236720ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 ADD_LIBRARY(embree_rthwif SHARED rtbuild.cpp qbvh6.cpp statistics.cpp ../level_zero_raytracing.rc) TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} PRIVATE tbb simd sys) SET_TARGET_PROPERTIES(embree_rthwif PROPERTIES OUTPUT_NAME ze_intel_gpu_raytracing) TARGET_COMPILE_DEFINITIONS(embree_rthwif PRIVATE ZE_RAYTRACING) TARGET_INCLUDE_DIRECTORIES(embree_rthwif PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") IF (WIN32) ELSE() SET_TARGET_PROPERTIES(embree_rthwif PROPERTIES LINK_FLAGS -Wl,--version-script="${CMAKE_CURRENT_SOURCE_DIR}/export.linux.map") SET_SOURCE_FILES_PROPERTIES(rtbuild.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/export.linux.map") ENDIF() INSTALL(TARGETS embree_rthwif EXPORT ze_raytracing-targets LIBRARY NAMELINK_SKIP DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel) #INSTALL(EXPORT ze_raytracing-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) level-zero-raytracing-support-1.2.3/rtbuild/algorithms/000077500000000000000000000000001514453371700233045ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rtbuild/algorithms/parallel_for.h000066400000000000000000000104301514453371700261150ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/array.h" #include "../math/emath.h" #include "../math/range.h" // We need to define these to avoid implicit linkage against // tbb_debug.lib under Windows. When removing these lines debug build // under Windows fails. #define __TBB_NO_IMPLICIT_LINKAGE 1 #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 #define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 #define TBB_PREVIEW_ISOLATED_TASK_GROUP 1 #include "tbb/tbb.h" namespace embree { struct TaskScheduler { /* returns the total number of threads */ static __forceinline size_t threadCount() { #if TBB_INTERFACE_VERSION >= 9100 return tbb::this_task_arena::max_concurrency(); #else return tbb::task_scheduler_init::default_num_threads(); #endif } }; /* parallel_for without range */ template __forceinline void parallel_for( const Index N, const Func& func) { #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); },context); if (context.is_group_execution_cancelled()) throw std::runtime_error("task cancelled"); #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); }); if (tbb::task::self().is_cancelled()) throw std::runtime_error("task cancelled"); #endif } /* parallel for with range and granulatity */ template __forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func) { assert(first <= last); #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; tbb::parallel_for(tbb::blocked_range(first,last,minStepSize),[&](const tbb::blocked_range& r) { func(range(r.begin(),r.end())); },context); if (context.is_group_execution_cancelled()) throw std::runtime_error("task cancelled"); #else tbb::parallel_for(tbb::blocked_range(first,last,minStepSize),[&](const tbb::blocked_range& r) { func(range(r.begin(),r.end())); }); if (tbb::task::self().is_cancelled()) throw std::runtime_error("task cancelled"); #endif } /* parallel for with range */ template __forceinline void parallel_for( const Index first, const Index last, const Func& func) { assert(first <= last); parallel_for(first,last,(Index)1,func); } #if (TBB_INTERFACE_VERSION > 4001) template __forceinline void parallel_for_static( const Index N, const Func& func) { #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); },tbb::simple_partitioner(),context); if (context.is_group_execution_cancelled()) throw std::runtime_error("task cancelled"); #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); },tbb::simple_partitioner()); if (tbb::task::self().is_cancelled()) throw std::runtime_error("task cancelled"); #endif } typedef tbb::affinity_partitioner affinity_partitioner; template __forceinline void parallel_for_affinity( const Index N, const Func& func, tbb::affinity_partitioner& ap) { #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); },ap,context); if (context.is_group_execution_cancelled()) throw std::runtime_error("task cancelled"); #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); },ap); if (tbb::task::self().is_cancelled()) throw std::runtime_error("task cancelled"); #endif } #else template __forceinline void parallel_for_static( const Index N, const Func& func) { parallel_for(N,func); } struct affinity_partitioner { }; template __forceinline void parallel_for_affinity( const Index N, const Func& func, affinity_partitioner& ap) { parallel_for(N,func); } #endif } level-zero-raytracing-support-1.2.3/rtbuild/algorithms/parallel_for_for.h000066400000000000000000000115671514453371700267770ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "parallel_for.h" namespace embree { template __forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func ) { size_t k=0; for (size_t i=0; i!=array2.size(); ++i) { const size_t N = array2[i]->size(); if (N) func(array2[i],range(0,N),k); k+=N; } } class ParallelForForState { public: enum { MAX_TASKS = 64 }; __forceinline ParallelForForState () : taskCount(0) {} template __forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) { init(array2,minStepSize); } template __forceinline ParallelForForState (const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize) { init(numArrays,getSize,minStepSize); } template __forceinline void init ( const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize ) { /* first calculate total number of elements */ size_t N = 0; for (size_t i=0; iN = N; /* calculate number of tasks to use */ const size_t numThreads = TaskScheduler::threadCount(); const size_t numBlocks = (N+minStepSize-1)/minStepSize; taskCount = max(size_t(1),min(numThreads,numBlocks,size_t(ParallelForForState::MAX_TASKS))); /* calculate start (i,j) for each task */ size_t taskIndex = 0; i0[taskIndex] = 0; j0[taskIndex] = 0; size_t k0 = (++taskIndex)*N/taskCount; for (size_t i=0, k=0; taskIndex < taskCount; i++) { assert(i= k0 && taskIndex < taskCount) { assert(taskIndex __forceinline void init ( ArrayArray& array2, const size_t minStepSize ) { init(array2.size(),[&](size_t i) { return array2[i] ? array2[i]->size() : 0; },minStepSize); } __forceinline size_t size() const { return N; } public: size_t i0[MAX_TASKS]; size_t j0[MAX_TASKS]; size_t taskCount; size_t N; }; template __forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func ) { ParallelForForState state(array2,minStepSize); parallel_for(state.taskCount, [&](const size_t taskIndex) { /* calculate range */ const size_t k0 = (taskIndex+0)*state.size()/state.taskCount; const size_t k1 = (taskIndex+1)*state.size()/state.taskCount; size_t i0 = state.i0[taskIndex]; size_t j0 = state.j0[taskIndex]; /* iterate over arrays */ size_t k=k0; for (size_t i=i0; ksize() : 0; const size_t r0 = j0, r1 = min(N,r0+k1-k); if (r1 > r0) func(array2[i],range(r0,r1),k); k+=r1-r0; j0 = 0; } }); } template __forceinline void parallel_for_for( ArrayArray& array2, const Func& func ) { parallel_for_for(array2,1,func); } template __forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction ) { ParallelForForState state(array2,minStepSize); Value temp[ParallelForForState::MAX_TASKS]; for (size_t i=0; isize() : 0; const size_t r0 = j0, r1 = min(N,r0+k1-k); if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range(r0,r1),k)); k+=r1-r0; j0 = 0; } }); Value ret = identity; for (size_t i=0; i __forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction) { return parallel_for_for_reduce(array2,1,identity,func,reduction); } } level-zero-raytracing-support-1.2.3/rtbuild/algorithms/parallel_for_for_prefix_sum.h000066400000000000000000000137421514453371700312350ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "parallel_for_for.h" #include "parallel_prefix_sum.h" namespace embree { template struct ParallelForForPrefixSumState : public ParallelForForState { __forceinline ParallelForForPrefixSumState () {} template __forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize) : ParallelForForState(array2,minStepSize) {} template __forceinline ParallelForForPrefixSumState (size_t numArrays, const SizeFunc& getSize, const size_t minStepSize) : ParallelForForState(numArrays,getSize,minStepSize) {} ParallelPrefixSumState prefix_state; }; template __forceinline Value parallel_for_for_prefix_sum0_( ParallelForForPrefixSumState& state, Index minStepSize, const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction) { /* calculate number of tasks to use */ const size_t taskCount = state.taskCount; /* perform parallel prefix sum */ parallel_for(taskCount, [&](const size_t taskIndex) { const size_t k0 = (taskIndex+0)*state.size()/taskCount; const size_t k1 = (taskIndex+1)*state.size()/taskCount; size_t i0 = state.i0[taskIndex]; size_t j0 = state.j0[taskIndex]; /* iterate over arrays */ size_t k=k0; Value N=identity; for (size_t i=i0; k r0) N = reduction(N, func((Index)i,range((Index)r0,(Index)r1),(Index)k)); k+=r1-r0; j0 = 0; } state.prefix_state.counts[taskIndex] = N; }); /* calculate prefix sum */ Value sum=identity; for (size_t i=0; i __forceinline Value parallel_for_for_prefix_sum1_( ParallelForForPrefixSumState& state, Index minStepSize, const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction) { /* calculate number of tasks to use */ const size_t taskCount = state.taskCount; /* perform parallel prefix sum */ parallel_for(taskCount, [&](const size_t taskIndex) { const size_t k0 = (taskIndex+0)*state.size()/taskCount; const size_t k1 = (taskIndex+1)*state.size()/taskCount; size_t i0 = state.i0[taskIndex]; size_t j0 = state.j0[taskIndex]; /* iterate over arrays */ size_t k=k0; Value N=identity; for (size_t i=i0; k r0) N = reduction(N, func((Index)i,range((Index)r0,(Index)r1),(Index)k,reduction(state.prefix_state.sums[taskIndex],N))); k+=r1-r0; j0 = 0; } state.prefix_state.counts[taskIndex] = N; }); /* calculate prefix sum */ Value sum=identity; for (size_t i=0; i __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState& state, ArrayArray& array2, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction) { return parallel_for_for_prefix_sum0_(state,minStepSize, [&](Index i) { return array2[i] ? array2[i]->size() : 0; }, identity, [&](Index i, const range& r, Index k) { return func(array2[i], r, k, i); }, reduction); } template __forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState& state, ArrayArray& array2, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction) { return parallel_for_for_prefix_sum1_(state,minStepSize, [&](Index i) { return array2[i] ? array2[i]->size() : 0; }, identity, [&](Index i, const range& r, Index k, const Value& base) { return func(array2[i], r, k, i, base); }, reduction); } template __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState& state, ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction) { return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction); } template __forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState& state, ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction) { return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction); } } level-zero-raytracing-support-1.2.3/rtbuild/algorithms/parallel_partition.h000066400000000000000000000254011514453371700273440ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "parallel_for.h" #include "../math/range.h" namespace embree { /* serial partitioning */ template __forceinline size_t serial_partitioning(T* array, const size_t begin, const size_t end, V& leftReduction, V& rightReduction, const IsLeft& is_left, const Reduction_T& reduction_t) { T* l = array + begin; T* r = array + end - 1; while(1) { /* *l < pivot */ while (likely(l <= r && is_left(*l) )) { //prefetchw(l+4); // FIXME: enable? reduction_t(leftReduction,*l); ++l; } /* *r >= pivot) */ while (likely(l <= r && !is_left(*r))) { //prefetchw(r-4); FIXME: enable? reduction_t(rightReduction,*r); --r; } if (r class __aligned(64) parallel_partition_task { ALIGNED_CLASS_(64); private: static const size_t MAX_TASKS = 64; T* array; size_t N; const IsLeft& is_left; const Reduction_T& reduction_t; const Reduction_V& reduction_v; const Vi& identity; size_t numTasks; __aligned(64) size_t counter_start[MAX_TASKS+1]; __aligned(64) size_t counter_left[MAX_TASKS+1]; __aligned(64) range leftMisplacedRanges[MAX_TASKS]; __aligned(64) range rightMisplacedRanges[MAX_TASKS]; __aligned(64) V leftReductions[MAX_TASKS]; __aligned(64) V rightReductions[MAX_TASKS]; public: __forceinline parallel_partition_task(T* array, const size_t N, const Vi& identity, const IsLeft& is_left, const Reduction_T& reduction_t, const Reduction_V& reduction_v, const size_t BLOCK_SIZE) : array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity), numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {} __forceinline const range* findStartRange(size_t& index, const range* const r, const size_t numRanges) { size_t i = 0; while(index >= (size_t)r[i].size()) { assert(i < numRanges); index -= (size_t)r[i].size(); i++; } return &r[i]; } __forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges, const size_t numRightMisplacedRanges, const size_t startID, const size_t endID) { size_t leftLocalIndex = startID; size_t rightLocalIndex = startID; const range* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges); const range* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges); size_t l_left = l_range->size() - leftLocalIndex; size_t r_left = r_range->size() - rightLocalIndex; T *__restrict__ l = &array[l_range->begin() + leftLocalIndex]; T *__restrict__ r = &array[r_range->begin() + rightLocalIndex]; size_t size = endID - startID; size_t items = min(size,min(l_left,r_left)); while (size) { if (unlikely(l_left == 0)) { l_range++; l_left = l_range->size(); l = &array[l_range->begin()]; items = min(size,min(l_left,r_left)); } if (unlikely(r_left == 0)) { r_range++; r_left = r_range->size(); r = &array[r_range->begin()]; items = min(size,min(l_left,r_left)); } size -= items; l_left -= items; r_left -= items; while(items) { items--; xchg(*l++,*r++); } } } __forceinline size_t partition(V& leftReduction, V& rightReduction) { /* partition the individual ranges for each task */ parallel_for(numTasks,[&] (const size_t taskID) { const size_t startID = (taskID+0)*N/numTasks; const size_t endID = (taskID+1)*N/numTasks; V local_left(identity); V local_right(identity); const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t); counter_start[taskID] = startID; counter_left [taskID] = mid-startID; leftReductions[taskID] = local_left; rightReductions[taskID] = local_right; }); counter_start[numTasks] = N; counter_left[numTasks] = 0; /* finalize the reductions */ for (size_t i=0; i globalLeft (0,mid); const range globalRight(mid,N); /* calculate all left and right ranges that are on the wrong global side */ size_t numMisplacedRangesLeft = 0; size_t numMisplacedRangesRight = 0; size_t numMisplacedItemsLeft MAYBE_UNUSED = 0; size_t numMisplacedItemsRight MAYBE_UNUSED = 0; for (size_t i=0; i left_range (counter_start[i], counter_start[i] + counter_left[i]); const range right_range(counter_start[i] + counter_left[i], counter_start[i+1]); const range left_misplaced = globalLeft. intersect(right_range); const range right_misplaced = globalRight.intersect(left_range); if (!left_misplaced.empty()) { numMisplacedItemsLeft += left_misplaced.size(); leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced; } if (!right_misplaced.empty()) { numMisplacedItemsRight += right_misplaced.size(); rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced; } } assert( numMisplacedItemsLeft == numMisplacedItemsRight ); /* if no items are misplaced we are done */ if (numMisplacedItemsLeft == 0) return mid; /* otherwise we copy the items to the right place in parallel */ parallel_for(numTasks,[&] (const size_t taskID) { const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks; const size_t endID = (taskID+1)*numMisplacedItemsLeft/numTasks; swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID); }); return mid; } }; template __noinline size_t parallel_partitioning(T* array, const size_t begin, const size_t end, const Vi &identity, V &leftReduction, V &rightReduction, const IsLeft& is_left, const Reduction_T& reduction_t, const Reduction_V& reduction_v, size_t BLOCK_SIZE = 128) { /* fall back to single threaded partitioning for small N */ if (unlikely(end-begin < BLOCK_SIZE)) return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t); /* otherwise use parallel code */ else { typedef parallel_partition_task partition_task; std::unique_ptr p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE)); return begin+p->partition(leftReduction,rightReduction); } } template __noinline size_t parallel_partitioning(T* array, const size_t begin, const size_t end, const Vi &identity, V &leftReduction, V &rightReduction, const IsLeft& is_left, const Reduction_T& reduction_t, const Reduction_V& reduction_v, size_t BLOCK_SIZE, size_t PARALLEL_THRESHOLD) { /* fall back to single threaded partitioning for small N */ if (unlikely(end-begin < PARALLEL_THRESHOLD)) return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t); /* otherwise use parallel code */ else { typedef parallel_partition_task partition_task; std::unique_ptr p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE)); return begin+p->partition(leftReduction,rightReduction); } } template inline size_t parallel_partitioning(T* array, const size_t begin, const size_t end, const IsLeft& is_left, size_t BLOCK_SIZE = 128) { size_t leftReduction = 0; size_t rightReduction = 0; return parallel_partitioning( array,begin,end,0,leftReduction,rightReduction,is_left, [] (size_t& t,const T& ref) { }, [] (size_t& t0,size_t& t1) { }, BLOCK_SIZE); } } level-zero-raytracing-support-1.2.3/rtbuild/algorithms/parallel_prefix_sum.h000066400000000000000000000054341514453371700275200ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "parallel_for.h" namespace embree { template struct ParallelPrefixSumState { enum { MAX_TASKS = 64 }; Value counts[MAX_TASKS]; Value sums [MAX_TASKS]; }; template __forceinline Value parallel_prefix_sum( ParallelPrefixSumState& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction) { /* calculate number of tasks to use */ const size_t numThreads = TaskScheduler::threadCount(); const size_t numBlocks = (last-first+minStepSize-1)/minStepSize; const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState::MAX_TASKS)); /* perform parallel prefix sum */ parallel_for(taskCount, [&](const size_t taskIndex) { const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount; const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount; state.counts[taskIndex] = func(range(i0,i1),state.sums[taskIndex]); }); /* calculate prefix sum */ Value sum=identity; for (size_t i=0; i __forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096) { /* perform single threaded prefix operation for small N */ if (N < SINGLE_THREAD_THRESHOLD) { Value sum=identity; for (size_t i=0; i state; /* initial run just sets up start values for subtasks */ parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range& r, const Value& sum) -> Value { Value s = identity; for (size_t i=r.begin(); i& r, const Value& sum) -> Value { Value s = identity; for (size_t i=r.begin(); i __forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction ) { return func(range(first,last)); } template __forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction ) { return func(range(first,last)); } template __noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction ) { const Index maxTasks = 512; const Index threadCount = (Index) TaskScheduler::threadCount(); taskCount = min(taskCount,threadCount,maxTasks); /* parallel invocation of all tasks */ dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack parallel_for(taskCount, [&](const Index taskIndex) { const Index k0 = first+(taskIndex+0)*(last-first)/taskCount; const Index k1 = first+(taskIndex+1)*(last-first)/taskCount; values[taskIndex] = func(range(k0,k1)); }); /* perform reduction over all tasks */ Value v = identity; for (Index i=0; i __forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction ) { #if defined(TASKING_INTERNAL) && !defined(TASKING_TBB) /* fast path for small number of iterations */ Index taskCount = (last-first+minStepSize-1)/minStepSize; if (likely(taskCount == 1)) { return func(range(first,last)); } return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction); #elif defined(TASKING_TBB) #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; const Value v = tbb::parallel_reduce(tbb::blocked_range(first,last,minStepSize),identity, [&](const tbb::blocked_range& r, const Value& start) { return reduction(start,func(range(r.begin(),r.end()))); }, reduction,context); if (context.is_group_execution_cancelled()) throw std::runtime_error("task cancelled"); return v; #else const Value v = tbb::parallel_reduce(tbb::blocked_range(first,last,minStepSize),identity, [&](const tbb::blocked_range& r, const Value& start) { return reduction(start,func(range(r.begin(),r.end()))); }, reduction); if (tbb::task::self().is_cancelled()) throw std::runtime_error("task cancelled"); return v; #endif #else // TASKING_PPL struct AlignedValue { char storage[__alignof(Value)+sizeof(Value)]; static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); }; Value* getValuePtr() { return reinterpret_cast(alignUp(uintptr_t(storage), __alignof(Value))); } const Value* getValuePtr() const { return reinterpret_cast(alignUp(uintptr_t(storage), __alignof(Value))); } AlignedValue(const Value& v) { new(getValuePtr()) Value(v); } AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); } AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); }; AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; }; AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; }; operator Value() const { return *getValuePtr(); } }; struct Iterator_Index { Index v; typedef std::forward_iterator_tag iterator_category; typedef AlignedValue value_type; typedef Index difference_type; typedef Index distance_type; typedef AlignedValue* pointer; typedef AlignedValue& reference; __forceinline Iterator_Index() {} __forceinline Iterator_Index(Index v) : v(v) {} __forceinline bool operator== (Iterator_Index other) { return v == other.v; } __forceinline bool operator!= (Iterator_Index other) { return v != other.v; } __forceinline Iterator_Index operator++() { return Iterator_Index(++v); } __forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); } }; auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) { assert(begin.v < end.v); return reduction(start, func(range(begin.v, end.v))); }; const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction); return v; #endif } template __forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction ) { if (likely(last-first < parallel_threshold)) { return func(range(first,last)); } else { return parallel_reduce(first,last,minStepSize,identity,func,reduction); } } template __forceinline Value parallel_reduce( const range range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction ) { return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction); } template __forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction ) { auto funcr = [&] ( const range r ) { Value v = identity; for (Index i=r.begin(); i __forceinline Value parallel_reduce( const range range, const Value& identity, const Func& func, const Reduction& reduction ) { return parallel_reduce(range.begin(),range.end(),Index(1),identity,func,reduction); } } level-zero-raytracing-support-1.2.3/rtbuild/algorithms/parallel_sort.h000066400000000000000000000312711514453371700263240ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../simd/simd.h" #include "parallel_for.h" #include namespace embree { template __forceinline void insertionsort_ascending(T *__restrict__ array, const size_t length) { for(size_t i = 1;i 0 && v < array[j-1]) { array[j] = array[j-1]; --j; } array[j] = v; } } template __forceinline void insertionsort_decending(T *__restrict__ array, const size_t length) { for(size_t i = 1;i 0 && v > array[j-1]) { array[j] = array[j-1]; --j; } array[j] = v; } } template void quicksort_ascending(T *__restrict__ t, const ssize_t begin, const ssize_t end) { if (likely(begin < end)) { const T pivotvalue = t[begin]; ssize_t left = begin - 1; ssize_t right = end + 1; while(1) { while (t[--right] > pivotvalue); while (t[++left] < pivotvalue); if (left >= right) break; const T temp = t[right]; t[right] = t[left]; t[left] = temp; } const int pivot = right; quicksort_ascending(t, begin, pivot); quicksort_ascending(t, pivot + 1, end); } } template void quicksort_decending(T *__restrict__ t, const ssize_t begin, const ssize_t end) { if (likely(begin < end)) { const T pivotvalue = t[begin]; ssize_t left = begin - 1; ssize_t right = end + 1; while(1) { while (t[--right] < pivotvalue); while (t[++left] > pivotvalue); if (left >= right) break; const T temp = t[right]; t[right] = t[left]; t[left] = temp; } const int pivot = right; quicksort_decending(t, begin, pivot); quicksort_decending(t, pivot + 1, end); } } template void quicksort_insertionsort_ascending(T *__restrict__ t, const ssize_t begin, const ssize_t end) { if (likely(begin < end)) { const ssize_t size = end-begin+1; if (likely(size <= THRESHOLD)) { insertionsort_ascending(&t[begin],size); } else { const T pivotvalue = t[begin]; ssize_t left = begin - 1; ssize_t right = end + 1; while(1) { while (t[--right] > pivotvalue); while (t[++left] < pivotvalue); if (left >= right) break; const T temp = t[right]; t[right] = t[left]; t[left] = temp; } const ssize_t pivot = right; quicksort_insertionsort_ascending(t, begin, pivot); quicksort_insertionsort_ascending(t, pivot + 1, end); } } } template void quicksort_insertionsort_decending(T *__restrict__ t, const ssize_t begin, const ssize_t end) { if (likely(begin < end)) { const ssize_t size = end-begin+1; if (likely(size <= THRESHOLD)) { insertionsort_decending(&t[begin],size); } else { const T pivotvalue = t[begin]; ssize_t left = begin - 1; ssize_t right = end + 1; while(1) { while (t[--right] < pivotvalue); while (t[++left] > pivotvalue); if (left >= right) break; const T temp = t[right]; t[right] = t[left]; t[left] = temp; } const ssize_t pivot = right; quicksort_insertionsort_decending(t, begin, pivot); quicksort_insertionsort_decending(t, pivot + 1, end); } } } template static void radixsort32(T* const morton, const size_t num, const unsigned int shift = 3*8) { static const unsigned int BITS = 8; static const unsigned int BUCKETS = (1 << BITS); static const unsigned int CMP_SORT_THRESHOLD = 16; __aligned(64) unsigned int count[BUCKETS]; /* clear buckets */ for (size_t i=0;i> shift) & (BUCKETS-1)]++; /* prefix sums */ __aligned(64) unsigned int head[BUCKETS]; __aligned(64) unsigned int tail[BUCKETS]; head[0] = 0; for (size_t i=1; i> shift) & (BUCKETS-1); if (b == i) break; std::swap(v,morton[head[b]++]); } assert((unsigned(v) >> shift & (BUCKETS-1)) == i); morton[head[i]++] = v; } } if (shift == 0) return; size_t offset = 0; for (size_t i=0;i> shift) & (BUCKETS-1)) == i); if (unlikely(count[i] < CMP_SORT_THRESHOLD)) insertionsort_ascending(morton + offset, count[i]); else radixsort32(morton + offset, count[i], shift-BITS); for (size_t j=offset;j class ParallelRadixSort { static const size_t MAX_TASKS = 64; static const size_t BITS = 8; static const size_t BUCKETS = (1 << BITS); typedef unsigned int TyRadixCount[BUCKETS]; template static bool compare(const T& v0, const T& v1) { return (Key)v0 < (Key)v1; } private: ParallelRadixSort (const ParallelRadixSort& other) DELETED; // do not implement ParallelRadixSort& operator= (const ParallelRadixSort& other) DELETED; // do not implement public: ParallelRadixSort (Ty* const src, Ty* const tmp, const size_t N) : radixCount(nullptr), src(src), tmp(tmp), N(N) {} void sort(const size_t blockSize) { assert(blockSize > 0); /* perform single threaded sort for small N */ if (N<=blockSize) // handles also special case of 0! { /* do inplace sort inside destination array */ std::sort(src,src+N,compare); } /* perform parallel sort for large N */ else { const size_t numThreads = min((N+blockSize-1)/blockSize,TaskScheduler::threadCount(),size_t(MAX_TASKS)); tbbRadixSort(numThreads); } } ~ParallelRadixSort() { alignedFree(radixCount); radixCount = nullptr; } private: void tbbRadixIteration0(const Key shift, const Ty* __restrict const src, Ty* __restrict const dst, const size_t threadIndex, const size_t threadCount) { const size_t startID = (threadIndex+0)*N/threadCount; const size_t endID = (threadIndex+1)*N/threadCount; /* mask to extract some number of bits */ const Key mask = BUCKETS-1; /* count how many items go into the buckets */ for (size_t i=0; i> (size_t)shift) & (size_t)mask; #else const Key index = ((Key)src[i] >> shift) & mask; #endif count[index]++; } } void tbbRadixIteration1(const Key shift, const Ty* __restrict const src, Ty* __restrict const dst, const size_t threadIndex, const size_t threadCount) { const size_t startID = (threadIndex+0)*N/threadCount; const size_t endID = (threadIndex+1)*N/threadCount; /* mask to extract some number of bits */ const Key mask = BUCKETS-1; /* calculate total number of items for each bucket */ __aligned(64) unsigned int total[BUCKETS]; /* for (size_t i=0; i> (size_t)shift) & (size_t)mask; #else const size_t index = ((Key)src[i] >> shift) & mask; #endif dst[offset[index]++] = elt; } } void tbbRadixIteration(const Key shift, const bool last, const Ty* __restrict src, Ty* __restrict dst, const size_t numTasks) { affinity_partitioner ap; parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration0(shift,src,dst,taskIndex,numTasks); },ap); parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration1(shift,src,dst,taskIndex,numTasks); },ap); } void tbbRadixSort(const size_t numTasks) { radixCount = (TyRadixCount*) alignedMalloc(MAX_TASKS*sizeof(TyRadixCount),64); if (sizeof(Key) == sizeof(uint32_t)) { tbbRadixIteration(0*BITS,0,src,tmp,numTasks); tbbRadixIteration(1*BITS,0,tmp,src,numTasks); tbbRadixIteration(2*BITS,0,src,tmp,numTasks); tbbRadixIteration(3*BITS,1,tmp,src,numTasks); } else if (sizeof(Key) == sizeof(uint64_t)) { tbbRadixIteration(0*BITS,0,src,tmp,numTasks); tbbRadixIteration(1*BITS,0,tmp,src,numTasks); tbbRadixIteration(2*BITS,0,src,tmp,numTasks); tbbRadixIteration(3*BITS,0,tmp,src,numTasks); tbbRadixIteration(4*BITS,0,src,tmp,numTasks); tbbRadixIteration(5*BITS,0,tmp,src,numTasks); tbbRadixIteration(6*BITS,0,src,tmp,numTasks); tbbRadixIteration(7*BITS,1,tmp,src,numTasks); } } private: TyRadixCount* radixCount; Ty* const src; Ty* const tmp; const size_t N; }; template void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) { ParallelRadixSort(src,tmp,N).sort(blockSize); } template void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) { ParallelRadixSort(src,tmp,N).sort(blockSize); } template void radix_sort_u32(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) { radix_sort(src,tmp,N,blockSize); } template void radix_sort_u64(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) { radix_sort(src,tmp,N,blockSize); } } level-zero-raytracing-support-1.2.3/rtbuild/builders/000077500000000000000000000000001514453371700227445ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rtbuild/builders/heuristic_binning.h000066400000000000000000000550501514453371700266250ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "priminfo.h" #include "../algorithms/parallel_reduce.h" #include "../algorithms/parallel_partition.h" namespace embree { namespace isa { /*! mapping into bins */ template struct BinMapping { public: __forceinline BinMapping() {} /*! calculates the mapping */ __forceinline BinMapping(size_t N, const BBox3fa& centBounds) { num = min(BINS,size_t(4.0f + 0.05f*N)); assert(num >= 1); const vfloat4 eps = 1E-34f; const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); ofs = (vfloat4) centBounds.lower; } /*! calculates the mapping */ __forceinline BinMapping(const BBox3fa& centBounds) { num = BINS; const vfloat4 eps = 1E-34f; const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); ofs = (vfloat4) centBounds.lower; } /*! calculates the mapping */ template __forceinline BinMapping(const PrimInfo& pinfo) { const vfloat4 eps = 1E-34f; num = min(BINS,size_t(4.0f + 0.05f*pinfo.size())); const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size()); scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); ofs = (vfloat4) pinfo.centBounds.lower; } /*! returns number of bins */ __forceinline size_t size() const { return num; } /*! slower but safe binning */ __forceinline Vec3ia bin(const Vec3fa& p) const { const vint4 i = floori((vfloat4(p)-ofs)*scale); assert(i[0] >= 0 && (size_t)i[0] < num); assert(i[1] >= 0 && (size_t)i[1] < num); assert(i[2] >= 0 && (size_t)i[2] < num); // we clamp to handle corner cases that could calculate out of bounds bin return Vec3ia(clamp(i,vint4(0),vint4(num-1))); } /*! faster but unsafe binning */ __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const { return Vec3ia(floori((vfloat4(p)-ofs)*scale)); } /*! faster but unsafe binning */ template __forceinline Vec3ia bin_unsafe(const PrimRef& p) const { return bin_unsafe(p.binCenter()); } /*! faster but unsafe binning */ template __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const { return bin_unsafe(binBoundsAndCenter.binCenter(p)); } template __forceinline bool bin_unsafe(const PrimRef& ref, const vint4& vSplitPos, const vbool4& splitDimMask) const // FIXME: rename to isLeft { return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask); } /*! calculates left spatial position of bin */ __forceinline float pos(const size_t bin, const size_t dim) const { return madd(float(bin),1.0f / scale[dim],ofs[dim]); } /*! returns true if the mapping is invalid in some dimension */ __forceinline bool invalid(const size_t dim) const { return scale[dim] == 0.0f; } /*! stream output */ friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) { return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}"; } public: size_t num; vfloat4 ofs,scale; //!< linear function that maps to bin ID }; /*! stores all information to perform some split */ template struct BinSplit { enum { SPLIT_OBJECT = 0, SPLIT_FALLBACK = 1, SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already SPLIT_TEMPORAL = 2, SPLIT_GEOMID = 3, }; /*! construct an invalid split by default */ __forceinline BinSplit() : sah(inf), dim(-1), pos(0), data(0) {} __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0) : sah(sah), dim(dim), fpos(fpos), data(data) {} /*! constructs specified split */ __forceinline BinSplit(float sah, int dim, int pos, const BinMapping& mapping) : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {} /*! tests if this split is valid */ __forceinline bool valid() const { return dim != -1; } /*! calculates surface area heuristic for performing the split */ __forceinline float splitSAH() const { return sah; } /*! stream output */ friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) { return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}"; } public: float sah; //!< SAH cost of the split int dim; //!< split dimension union { int pos; float fpos; }; //!< bin index for splitting unsigned int data; //!< extra optional split data BinMapping mapping; //!< mapping into bins }; /*! stores extended information about the split */ template struct SplitInfoT { __forceinline SplitInfoT () {} __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds) : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {} public: size_t leftCount,rightCount; BBox leftBounds,rightBounds; }; typedef SplitInfoT SplitInfo; /*! stores all binning information */ template struct __aligned(64) BinInfoT { typedef BinSplit Split; typedef vbool4 vbool; typedef vint4 vint; typedef vfloat4 vfloat; __forceinline BinInfoT() { } __forceinline BinInfoT(EmptyTy) { clear(); } /*! bin access function */ __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; } __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; } __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; } __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; } __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; } __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; } /*! clears the bin info */ __forceinline void clear() { for (size_t i=0; i& mapping) { if (unlikely(N == 0)) return; size_t i; for (i=0; i(bin0); bounds(b00,0).extend(prim0); const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0); const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0); const unsigned int s0 = (unsigned int)prims[i+0].size(); counts(b00,0)+=s0; counts(b01,1)+=s0; counts(b02,2)+=s0; /*! increase bounds of bins for odd primitive */ const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1); const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1); const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1); const unsigned int s1 = (unsigned int)prims[i+1].size(); counts(b10,0)+=s1; counts(b11,1)+=s1; counts(b12,2)+=s1; } /*! for uneven number of primitives */ if (i < N) { /*! map primitive to bin */ BBox prim0; Vec3fa center0; prims[i].binBoundsAndCenter(prim0,center0); const vint4 bin0 = (vint4)mapping.bin(center0); /*! increase bounds of bins */ const unsigned int s0 = (unsigned int)prims[i].size(); const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); } } /*! bins an array of primitives */ template __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { if (N == 0) return; size_t i; for (i=0; i(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); /*! increase bounds of bins for odd primitive */ const unsigned int s1 = prims[i+1].size(); const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1); const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1); const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1); } /*! for uneven number of primitives */ if (i < N) { /*! map primitive to bin */ BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); const vint4 bin0 = (vint4)mapping.bin(center0); /*! increase bounds of bins */ const unsigned int s0 = prims[i+0].size(); const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); } } __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping& mapping) { bin(prims+begin,end-begin,mapping); } template __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { bin(prims+begin,end-begin,mapping,binBoundsAndCenter); } /*! merges in other binning information */ __forceinline void merge (const BinInfoT& other, size_t numBins) { for (size_t i=0; i& mapping, const size_t blocks_shift) const { /* sweep from right to left and compute parallel prefix of merged bounds */ vfloat4 rAreas[BINS]; vuint4 rCounts[BINS]; vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; for (size_t i=mapping.size()-1; i>0; i--) { count += counts(i); rCounts[i] = count; bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); rAreas[i][3] = 0.0f; } /* sweep from left to right and compute SAH */ vuint4 blocks_add = (1 << blocks_shift)-1; vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; count = 0; bx = empty; by = empty; bz = empty; for (size_t i=1; i> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions. const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); vbestPos = select(sah < vbestSAH,ii ,vbestPos); vbestSAH = select(sah < vbestSAH,sah,vbestSAH); } /* find best dimension */ float bestSAH = inf; int bestDim = -1; int bestPos = 0; for (int dim=0; dim<3; dim++) { /* ignore zero sized dimensions */ if (unlikely(mapping.invalid(dim))) continue; /* test if this is a better dimension */ if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { bestDim = dim; bestPos = vbestPos[dim]; bestSAH = vbestSAH[dim]; } } return Split(bestSAH,bestDim,bestPos,mapping); } /*! finds the best split by scanning binning information */ __forceinline Split best_block_size(const BinMapping& mapping, const size_t blockSize) const { /* sweep from right to left and compute parallel prefix of merged bounds */ vfloat4 rAreas[BINS]; vuint4 rCounts[BINS]; vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; for (size_t i=mapping.size()-1; i>0; i--) { count += counts(i); rCounts[i] = count; bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); rAreas[i][3] = 0.0f; } /* sweep from left to right and compute SAH */ vuint4 blocks_add = blockSize-1; vfloat4 blocks_factor = 1.0f/float(blockSize); vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; count = 0; bx = empty; by = empty; bz = empty; for (size_t i=1; i& mapping, const Split& split, SplitInfoT& info) const { if (split.dim == -1) { new (&info) SplitInfoT(0,empty,0,empty); return; } size_t leftCount = 0; BBox leftBounds = empty; for (size_t i=0; i<(size_t)split.pos; i++) { leftCount += counts(i,split.dim); leftBounds.extend(bounds(i,split.dim)); } size_t rightCount = 0; BBox rightBounds = empty; for (size_t i=split.pos; i(leftCount,leftBounds,rightCount,rightBounds); } /*! gets the number of primitives left of the split */ __forceinline size_t getLeftCount(const BinMapping& mapping, const Split& split) const { if (unlikely(split.dim == -1)) return -1; size_t leftCount = 0; for (size_t i = 0; i < (size_t)split.pos; i++) { leftCount += counts(i, split.dim); } return leftCount; } /*! gets the number of primitives right of the split */ __forceinline size_t getRightCount(const BinMapping& mapping, const Split& split) const { if (unlikely(split.dim == -1)) return -1; size_t rightCount = 0; for (size_t i = (size_t)split.pos; i __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping) { if (likely(end-begin < parallelThreshold)) { binner.bin(prims,begin,end,mapping); } else { binner = parallel_reduce(begin,end,blockSize,binner, [&](const range& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); } } template __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { if (likely(end-begin < parallelThreshold)) { binner.bin(prims,begin,end,mapping,binBoundsAndCenter); } else { binner = parallel_reduce(begin,end,blockSize,binner, [&](const range& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); } } template __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping) { if (!parallel) { binner.bin(prims,begin,end,mapping); } else { binner = parallel_reduce(begin,end,blockSize,binner, [&](const range& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); } } template __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { if (!parallel) { binner.bin(prims,begin,end,mapping,binBoundsAndCenter); } else { binner = parallel_reduce(begin,end,blockSize,binner, [&](const range& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); } } } level-zero-raytracing-support-1.2.3/rtbuild/builders/heuristic_binning_array_aligned.h000066400000000000000000000201441514453371700315020ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "heuristic_binning.h" namespace embree { namespace isa { struct PrimInfoRange : public CentGeomBBox3fa, public range { __forceinline PrimInfoRange () { } __forceinline PrimInfoRange(const PrimInfo& pinfo) : CentGeomBBox3fa(pinfo), range(pinfo.begin,pinfo.end) {} __forceinline PrimInfoRange(EmptyTy) : CentGeomBBox3fa(EmptyTy()), range(0,0) {} __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) : CentGeomBBox3fa(centGeomBounds), range(begin,end) {} __forceinline PrimInfoRange (range r, const CentGeomBBox3fa& centGeomBounds) : CentGeomBBox3fa(centGeomBounds), range(r) {} __forceinline float leafSAH() const { return expectedApproxHalfArea(geomBounds)*float(size()); } __forceinline float leafSAH(size_t block_shift) const { return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<> block_shift); } __forceinline range get_range() const { return range(begin(),end()); } template __forceinline void add_primref(const PrimRef& prim) { CentGeomBBox3fa::extend_primref(prim); _end++; } }; inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) { const size_t begin = pinfo.begin(); const size_t end = pinfo.end(); const size_t center = (begin + end)/2; CentGeomBBox3fa left(empty); for (size_t i=begin; i inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range range, PrimInfoRange& linfo, PrimInfoRange& rinfo) { CentGeomBBox3fa local_left(empty), local_right(empty); auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); }; const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref); linfo = PrimInfoRange(make_range(range.begin(),center ),local_left); rinfo = PrimInfoRange(make_range(center ,range.end()),local_right); } /*! Performs standard object binning */ template struct HeuristicArrayBinningSAH { typedef BinSplit Split; typedef BinInfoT Binner; typedef range Set; static const size_t PARALLEL_THRESHOLD = 3 * 1024; static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; __forceinline HeuristicArrayBinningSAH () : prims(nullptr) {} /*! remember prim array */ __forceinline HeuristicArrayBinningSAH (PrimRef* prims) : prims(prims) {} /*! finds the best split */ __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize) { if (likely(pinfo.size() < PARALLEL_THRESHOLD)) return find_template(pinfo,logBlockSize); else return find_template(pinfo,logBlockSize); } template __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize) { Binner binner(empty); const BinMapping mapping(pinfo); bin_serial_or_parallel(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping); return binner.best(mapping,logBlockSize); } /*! finds the best split */ __noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize) { if (likely(pinfo.size() < PARALLEL_THRESHOLD)) return find_block_size_template(pinfo,blockSize); else return find_block_size_template(pinfo,blockSize); } template __forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize) { Binner binner(empty); const BinMapping mapping(pinfo); bin_serial_or_parallel(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping); return binner.best_block_size(mapping,blockSize); } /*! array partitioning */ __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) { if (likely(pinfo.size() < PARALLEL_THRESHOLD)) split_template(split,pinfo,linfo,rinfo); else split_template(split,pinfo,linfo,rinfo); } template __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset) { if (!split.valid()) { deterministic_order(set); return splitFallback(set,lset,rset); } const size_t begin = set.begin(); const size_t end = set.end(); CentGeomBBox3fa local_left(empty); CentGeomBBox3fa local_right(empty); const unsigned int splitPos = split.pos; const unsigned int splitDim = split.dim; const unsigned int splitDimMask = (unsigned int)1 << splitDim; const typename Binner::vint vSplitPos(splitPos); const typename Binner::vbool vSplitMask(splitDimMask); auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; size_t center = 0; if (!parallel) center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft, [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }); else center = parallel_partitioning( prims,begin,end,EmptyTy(),local_left,local_right,isLeft, [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }, [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); }, PARALLEL_PARTITION_BLOCK_SIZE); new (&lset) PrimInfoRange(begin,center,local_left); new (&rset) PrimInfoRange(center,end,local_right); assert(area(lset.geomBounds) >= 0.0f); assert(area(rset.geomBounds) >= 0.0f); } void deterministic_order(const PrimInfoRange& pinfo) { /* required as parallel partition destroys original primitive order */ std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]); } void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) { performFallbackSplit(prims,pinfo,linfo,rinfo); } void splitByGeometry(const range& range, PrimInfoRange& linfo, PrimInfoRange& rinfo) { assert(range.size() > 1); CentGeomBBox3fa left(empty); CentGeomBBox3fa right(empty); unsigned int geomID = prims[range.begin()].geomID(); size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right, [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; }, [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); }); new (&linfo) PrimInfoRange(range.begin(),center,left); new (&rinfo) PrimInfoRange(center,range.end(),right); } private: PrimRef* const prims; }; } } level-zero-raytracing-support-1.2.3/rtbuild/builders/heuristic_spatial.h000066400000000000000000000325321514453371700266360ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "priminfo.h" namespace embree { static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5; namespace isa { /*! mapping into bins */ template struct SpatialBinMapping { public: __forceinline SpatialBinMapping() {} /*! calculates the mapping */ __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo) { const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower; const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper; const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper)); const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size()); scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag); ofs = (vfloat4) pinfo.geomBounds.lower; inv_scale = 1.0f / scale; } /*! slower but safe binning */ __forceinline vint4 bin(const Vec3fa& p) const { const vint4 i = floori((vfloat4(p)-ofs)*scale); return clamp(i,vint4(0),vint4(BINS-1)); } __forceinline std::pair bin(const BBox3fa& b) const { const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale); const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale); const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1)); const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1)); return std::pair(c_lower,c_upper); } /*! calculates left spatial position of bin */ __forceinline float pos(const size_t bin, const size_t dim) const { return madd(float(bin),inv_scale[dim],ofs[dim]); } /*! calculates left spatial position of bin */ template __forceinline vfloat posN(const vfloat bin, const size_t dim) const { return madd(bin,vfloat(inv_scale[dim]),vfloat(ofs[dim])); } /*! returns true if the mapping is invalid in some dimension */ __forceinline bool invalid(const size_t dim) const { return scale[dim] == 0.0f; } public: vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID }; /*! stores all information required to perform some split */ template struct SpatialBinSplit { /*! construct an invalid split by default */ __forceinline SpatialBinSplit() : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {} /*! constructs specified split */ __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping& mapping) : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {} /*! constructs specified split */ __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping& mapping) : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {} /*! tests if this split is valid */ __forceinline bool valid() const { return dim != -1; } /*! calculates surface area heuristic for performing the split */ __forceinline float splitSAH() const { return sah; } /*! stream output */ friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) { return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}"; } public: float sah; //!< SAH cost of the split int dim; //!< split dimension int pos; //!< split position int left; //!< number of elements on the left side int right; //!< number of elements on the right side float factor; //!< factor splitting the extended range SpatialBinMapping mapping; //!< mapping into bins }; /*! stores all binning information */ template struct __aligned(64) SpatialBinInfo { SpatialBinInfo() { } __forceinline SpatialBinInfo(EmptyTy) { clear(); } /*! clears the bin info */ __forceinline void clear() { for (size_t i=0; i __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping& mapping) { for (size_t i=begin; i> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); if (unlikely(splits <= 1)) { const vint4 bin = mapping.bin(center(prim.bounds())); for (size_t dim=0; dim<3; dim++) { assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS); add(dim,bin[dim],bin[dim],bin[dim],prim.bounds()); } } else { const vint4 bin0 = mapping.bin(prim.bounds().lower); const vint4 bin1 = mapping.bin(prim.bounds().upper); for (size_t dim=0; dim<3; dim++) { if (unlikely(mapping.invalid(dim))) continue; size_t bin; size_t l = bin0[dim]; size_t r = bin1[dim]; // same bin optimization if (likely(l == r)) { add(dim,l,l,l,prim.bounds()); continue; } size_t bin_start = bin0[dim]; size_t bin_end = bin1[dim]; BBox3fa rest = prim.bounds(); /* assure that split position always overlaps the primitive bounds */ while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++; while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--; const auto splitter = splitterFactory(prim); for (bin=bin_start; bin& mapping) { for (size_t i=begin; i best(const SpatialBinMapping& mapping, const size_t blocks_shift) const { /* sweep from right to left and compute parallel prefix of merged bounds */ vfloat4 rAreas[BINS]; vuint4 rCounts[BINS]; vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; for (size_t i=BINS-1; i>0; i--) { count += numEnd[i]; rCounts[i] = count; bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx); by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by); bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz); rAreas[i][3] = 0.0f; } /* sweep from left to right and compute SAH */ vuint4 blocks_add = (1 << blocks_shift)-1; vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0; count = 0; bx = empty; by = empty; bz = empty; for (size_t i=1; i> (unsigned int)(blocks_shift); const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); const vbool4 mask = sah < vbestSAH; vbestPos = select(mask,ii ,vbestPos); vbestSAH = select(mask,sah,vbestSAH); vbestlCount = select(mask,count,vbestlCount); vbestrCount = select(mask,rCounts[i],vbestrCount); } /* find best dimension */ float bestSAH = inf; int bestDim = -1; int bestPos = 0; unsigned int bestlCount = 0; unsigned int bestrCount = 0; for (int dim=0; dim<3; dim++) { /* ignore zero sized dimensions */ if (unlikely(mapping.invalid(dim))) continue; /* test if this is a better dimension */ if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { bestDim = dim; bestPos = vbestPos[dim]; bestSAH = vbestSAH[dim]; bestlCount = vbestlCount[dim]; bestrCount = vbestrCount[dim]; } } assert(bestSAH >= 0.0f); /* return invalid split if no split found */ if (bestDim == -1) return SpatialBinSplit(inf,-1,0,mapping); /* return best found split */ return SpatialBinSplit(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping); } private: BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension vuint4 numBegin[BINS]; //!< number of primitives starting in bin vuint4 numEnd[BINS]; //!< number of primitives ending in bin }; } } level-zero-raytracing-support-1.2.3/rtbuild/builders/priminfo.h000066400000000000000000000114401514453371700247400ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "primref.h" namespace embree { // FIXME: maybe there's a better place for this util fct __forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2) { const Vec3fa e0 = v1-v0; const Vec3fa e1 = v2-v0; const Vec3fa d = cross(e0,e1); return fabs(d.x) + fabs(d.y) + fabs(d.z); } //namespace isa //{ template class CentGeom { public: __forceinline CentGeom () {} __forceinline CentGeom (EmptyTy) : geomBounds(empty), centBounds(empty) {} __forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds) : geomBounds(geomBounds), centBounds(centBounds) {} template __forceinline void extend_primref(const PrimRef& prim) { BBox bounds; Vec3fa center; prim.binBoundsAndCenter(bounds,center); geomBounds.extend(bounds); centBounds.extend(center); } static void extend_ref (CentGeom& pinfo, const PrimRef& ref) { pinfo.extend_primref(ref); }; template __forceinline void extend_center2(const PrimRef& prim) { BBox3fa bounds = prim.bounds(); geomBounds.extend(bounds); centBounds.extend(bounds.center2()); } __forceinline void extend(const BBox& geomBounds_) { geomBounds.extend(geomBounds_); centBounds.extend(center2(geomBounds_)); } __forceinline void merge(const CentGeom& other) { geomBounds.extend(other.geomBounds); centBounds.extend(other.centBounds); } static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) { CentGeom r = a; r.merge(b); return r; } public: BBox geomBounds; //!< geometry bounds of primitives BBox3fa centBounds; //!< centroid bounds of primitives }; typedef CentGeom CentGeomBBox3fa; /*! stores bounding information for a set of primitives */ template class PrimInfoT : public CentGeom { public: using CentGeom::geomBounds; using CentGeom::centBounds; __forceinline PrimInfoT () {} __forceinline PrimInfoT (EmptyTy) : CentGeom(empty), begin(0), end(0) {} __forceinline PrimInfoT (size_t N) : CentGeom(empty), begin(0), end(N) {} __forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) : CentGeom(centGeomBounds), begin(begin), end(end) {} template __forceinline void add_primref(const PrimRef& prim) { CentGeom::extend_primref(prim); end++; } template __forceinline void add_center2(const PrimRef& prim) { CentGeom::extend_center2(prim); end++; } template __forceinline void add_center2(const PrimRef& prim, const size_t i) { CentGeom::extend_center2(prim); end+=i; } /*__forceinline void add(const BBox& geomBounds_) { CentGeom::extend(geomBounds_); end++; } __forceinline void add(const BBox& geomBounds_, const size_t i) { CentGeom::extend(geomBounds_); end+=i; }*/ __forceinline void merge(const PrimInfoT& other) { CentGeom::merge(other); begin += other.begin; end += other.end; } static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) { PrimInfoT r = a; r.merge(b); return r; } /*! returns the number of primitives */ __forceinline size_t size() const { return end-begin; } __forceinline float halfArea() { return expectedApproxHalfArea(geomBounds); } __forceinline float leafSAH() const { return expectedApproxHalfArea(geomBounds)*float(size()); //return halfArea(geomBounds)*blocks(num); } __forceinline float leafSAH(size_t block_shift) const { return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<> block_shift); //return halfArea(geomBounds)*float((num+3) >> 2); //return halfArea(geomBounds)*blocks(num); } /*! stream output */ friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) { return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}"; } public: size_t begin,end; //!< number of primitives }; typedef PrimInfoT PrimInfo; //typedef PrimInfoT PrimInfoMB; //} } level-zero-raytracing-support-1.2.3/rtbuild/builders/primref.h000066400000000000000000000056521514453371700245710ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/platform.h" #include "../sys/sysinfo.h" #include "../math/emath.h" #include "../simd/simd.h" #include "../math/vec2.h" #include "../math/vec3.h" #include "../math/vec3fa.h" #include "../math/bbox.h" #include "../math/affinespace.h" #include "../math/range.h" namespace embree { /*! A primitive reference stores the bounds of the primitive and its ID. */ struct __aligned(32) PrimRef { __forceinline PrimRef () {} __forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID) { lower = Vec3fx(bounds.lower, geomID); upper = Vec3fx(bounds.upper, primID); } __forceinline PrimRef (const BBox3fa& bounds, size_t id) { #if defined(__64BIT__) lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF)); upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF)); #else lower = Vec3fx(bounds.lower, (unsigned)id); upper = Vec3fx(bounds.upper, (unsigned)0); #endif } /*! calculates twice the center of the primitive */ __forceinline const Vec3fa center2() const { return lower+upper; } /*! return the bounding box of the primitive */ __forceinline const BBox3fa bounds() const { return BBox3fa(lower,upper); } /*! size for bin heuristic is 1 */ __forceinline unsigned size() const { return 1; } /*! returns bounds and centroid used for binning */ __forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const { bounds_o = bounds(); center_o = embree::center2(bounds_o); } /*! returns the geometry ID */ __forceinline unsigned geomID() const { return lower.a; } /*! returns the primitive ID */ __forceinline unsigned primID() const { return upper.a; } /*! returns an size_t sized ID */ __forceinline size_t ID() const { #if defined(__64BIT__) return size_t(lower.u) + (size_t(upper.u) << 32); #else return size_t(lower.u); #endif } /*! special function for operator< */ __forceinline uint64_t ID64() const { return (((uint64_t)primID()) << 32) + (uint64_t)geomID(); } /*! allows sorting the primrefs by ID */ friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) { return p0.ID64() < p1.ID64(); } /*! Outputs primitive reference to a stream. */ friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) { return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }"; } public: Vec3fx lower; //!< lower bounds and geomID Vec3fx upper; //!< upper bounds and primID }; /*! fast exchange for PrimRefs */ __forceinline void xchg(PrimRef& a, PrimRef& b) { std::swap(a,b); } } level-zero-raytracing-support-1.2.3/rtbuild/builders/primrefgen_presplit.h000066400000000000000000000366451514453371700272130ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../algorithms/parallel_reduce.h" #include "../algorithms/parallel_sort.h" #include "../builders/heuristic_spatial.h" #include "../builders/splitter.h" #include "../algorithms/parallel_partition.h" #include "../algorithms/parallel_for_for.h" #include "../algorithms/parallel_for_for_prefix_sum.h" #define DBG_PRESPLIT(x) #define CHECK_PRESPLIT(x) #define GRID_SIZE 1024 //#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6 #define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5 #define MAX_PRESPLITS_PER_PRIMITIVE (1<= vint4(gupper),vint4(ilower),vint4(iupper)); /* compute a morton code for the lower and upper grid coordinates. */ const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); /* if all bits are equal then we cannot split */ if (unlikely(lower_code == upper_code)) return false; /* compute octree level and dimension to perform the split in */ const unsigned int diff = 31 - lzcnt(lower_code^upper_code); const unsigned int level = diff / 3; const unsigned int dim = diff % 3; /* now we compute the grid position of the split */ const unsigned int isplit = iupper[dim] & ~((1<= fsplit); dim_o = dim; fsplit_o = fsplit; return true; } __forceinline Vec2i computeMC(const PrimRef& ref) const { const Vec3fa lower = ref.lower; const Vec3fa upper = ref.upper; const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f); const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f); Vec3ia ilower(floor(glower)); Vec3ia iupper(floor(gupper)); /* this ignores dimensions that are empty */ iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)); /* compute a morton code for the lower and upper grid coordinates. */ const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); return Vec2i(lower_code,upper_code); } Vec3fa base; float scale; float extend; }; struct PresplitItem { union { float priority; unsigned int data; }; unsigned int index; __forceinline operator unsigned() const { return data; } template __forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc) { const float area_aabb = area(ref.bounds()); const float area_prim = primitiveArea(ref); if (area_prim == 0.0f) return 0.0f; const unsigned int diff = 32 - lzcnt(mc.x^mc.y); //assert(area_prim <= area_aabb); // may trigger due to numerical issues const float area_diff = max(0.0f, area_aabb - area_prim); //const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f); const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) )); //const float priority = sqrtf(sqrtf( area_diff ) ); //const float priority = sqrtfarea_diff; //const float priority = area_diff; // 104 fps !!!!!!!!!! //const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps //const float priority = area_aabb * max(area_aabb/area_prim,32.0f); //const float priority = area_prim; assert(priority >= 0.0f && priority < FLT_LARGE); return priority; } }; inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) { return cout << "index " << item.index << " priority " << item.priority; }; #if 1 template void splitPrimitive(const Splitter& splitter, const PrimRef& prim, const unsigned int splitprims, const SplittingGrid& grid, PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], unsigned int& numSubPrims) { assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE); if (splitprims == 1) { assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); subPrims[numSubPrims++] = prim; } else { unsigned int dim; float fsplit; if (!grid.split_pos(prim, dim, fsplit)) { assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); subPrims[numSubPrims++] = prim; return; } /* split primitive */ PrimRef left,right; splitter(prim,dim,fsplit,left,right); assert(!left.bounds().empty()); assert(!right.bounds().empty()); const unsigned int splitprims_left = splitprims/2; const unsigned int splitprims_right = splitprims - splitprims_left; splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims); splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims); } } #else template void splitPrimitive(const Splitter& splitter, const PrimRef& prim, const unsigned int targetSubPrims, const SplittingGrid& grid, PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], unsigned int& numSubPrims) { assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE); auto compare = [] ( const PrimRef& a, const PrimRef& b ) { return area(a.bounds()) < area(b.bounds()); }; subPrims[numSubPrims++] = prim; while (numSubPrims < targetSubPrims) { /* get top heap element */ std::pop_heap(subPrims+0,subPrims+numSubPrims, compare); PrimRef top = subPrims[--numSubPrims]; unsigned int dim; float fsplit; if (!grid.split_pos(top, dim, fsplit)) { assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); subPrims[numSubPrims++] = top; return; } /* split primitive */ PrimRef left,right; splitter(top,dim,fsplit,left,right); assert(!left.bounds().empty()); assert(!right.bounds().empty()); subPrims[numSubPrims++] = left; std::push_heap(subPrims+0, subPrims+numSubPrims, compare); subPrims[numSubPrims++] = right; std::push_heap(subPrims+0, subPrims+numSubPrims, compare); } } #endif template PrimInfo createPrimRefArray_presplit(size_t numPrimRefs, PrimVector& prims, const PrimInfo& pinfo, const SplitPrimitiveFunc& splitPrimitive, const ProjectedPrimitiveAreaFunc& primitiveArea) { static const size_t MIN_STEP_SIZE = 128; /* use correct number of primitives */ size_t numPrimitives = pinfo.size(); const size_t numPrimitivesExt = prims.size(); const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives; /* allocate double buffer presplit items */ avector preSplitItem0(numPrimitivesExt); avector preSplitItem1(numPrimitivesExt); /* compute grid */ SplittingGrid grid(pinfo.geomBounds); /* init presplit items and get total sum */ const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range& r) -> float { float sum = 0.0f; for (size_t i=r.begin(); i float { return a+b; }); /* compute number of splits per primitive */ const float inv_psum = 1.0f / psum; parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range& r) -> void { for (size_t i=r.begin(); i= numPrimitives) return pinfo; size_t numPrimitivesToSplit = numPrimitives - center; assert(preSplitItem0[center].data >= 1.0f); /* sort presplit items in ascending order */ radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024); CHECK_PRESPLIT( parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range& r) -> void { for (size_t i=r.begin(); i& t) -> size_t { size_t sum = 0; for (size_t i=t.begin(); i= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE); unsigned int numSubPrims = 0; PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims); assert(numSubPrims); numSubPrims--; // can reuse slot sum+=numSubPrims; preSplitItem0[i].data = (numSubPrims << 16) | splitprims; primOffset0[i-center] = numSubPrims; } return sum; },[](const size_t& a, const size_t& b) -> size_t { return a+b; }); /* if we are over budget, need to shrink the range */ if (totalNumSubPrims > numSplitPrimitivesBudget) { size_t new_center = numPrimitives-1; size_t sum = 0; for (;new_center>=center;new_center--) { const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16; if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break; sum += numSubPrims; } new_center++; primOffset0 += new_center - center; numPrimitivesToSplit -= new_center - center; center = new_center; assert(numPrimitivesToSplit == (numPrimitives - center)); } /* parallel prefix sum to compute offsets for storing sub-primitives */ const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus()); assert(numPrimitives+offset <= numPrimitivesExt); /* iterate over range, and split primitives into sub primitives and append them to prims array */ parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range& rn) -> void { for (size_t j=rn.begin(); j= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE); unsigned int numSubPrims = 0; PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims); const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16; assert(numSubPrims-1 == numSubPrimsExpected); const size_t newID = numPrimitives + primOffset1[j-center]; assert(newID+numSubPrims-1 <= numPrimitivesExt); prims[primrefID] = subPrims[0]; for (size_t i=1;i& r) -> PrimInfo { PrimInfo p(empty); for (size_t j=r.begin(); j PrimInfo { return PrimInfo::merge(a,b); }); assert(pinfo1.size() == numPrimitives); return pinfo1; } } } level-zero-raytracing-support-1.2.3/rtbuild/builders/splitter.h000066400000000000000000000101171514453371700247630ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../builders/primref.h" namespace embree { namespace isa { template __forceinline void splitPolygon(const BBox3fa& bounds, const size_t dim, const float pos, const Vec3fa (&v)[N+1], BBox3fa& left_o, BBox3fa& right_o) { BBox3fa left = empty, right = empty; /* clip triangle to left and right box by processing all edges */ for (size_t i=0; i= pos) right.extend(v0); // this point is on right side if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location { assert((v1d-v0d) != 0.0f); const float inv_length = 1.0f/(v1d-v0d); const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0); left.extend(c); right.extend(c); } } /* clip against current bounds */ left_o = intersect(left,bounds); right_o = intersect(right,bounds); } template __forceinline void splitPolygon(const BBox3fa& bounds, const size_t dim, const float pos, const Vec3fa (&v)[N+1], const Vec3fa (&inv_length)[N], BBox3fa& left_o, BBox3fa& right_o) { BBox3fa left = empty, right = empty; /* clip triangle to left and right box by processing all edges */ for (size_t i=0; i= pos) right.extend(v0); // this point is on right side if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location { assert((v1d-v0d) != 0.0f); const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0); left.extend(c); right.extend(c); } } /* clip against current bounds */ left_o = intersect(left,bounds); right_o = intersect(right,bounds); } template __forceinline void splitPolygon(const PrimRef& prim, const size_t dim, const float pos, const Vec3fa (&v)[N+1], PrimRef& left_o, PrimRef& right_o) { BBox3fa left = empty, right = empty; for (size_t i=0; i= pos) right.extend(v0); // this point is on right side if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location { assert((v1d-v0d) != 0.0f); const float inv_length = 1.0f/(v1d-v0d); const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0); left.extend(c); right.extend(c); } } /* clip against current bounds */ new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID()); new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID()); } } } level-zero-raytracing-support-1.2.3/rtbuild/export.linux.map000066400000000000000000000000411514453371700243040ustar00rootroot00000000000000{ global: ze*; local: *; }; level-zero-raytracing-support-1.2.3/rtbuild/leaf.h000066400000000000000000000677051514453371700222320ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if defined(ZE_RAYTRACING) #include "sys/sysinfo.h" #include "sys/vector.h" #include "math/vec2.h" #include "math/vec3.h" #include "math/bbox.h" #include "math/affinespace.h" #else #include "../../../common/sys/sysinfo.h" #include "../../../common/sys/vector.h" #include "../../../common/math/vec2.h" #include "../../../common/math/vec3.h" #include "../../../common/math/bbox.h" #include "../../../common/math/lbbox.h" #include "../../../common/math/affinespace.h" #endif #include "node_type.h" #include #include namespace embree { /* Internal representation for GeometryFlags. */ #undef OPAQUE // Windows defines OPAQUE in gdi.h enum class GeometryFlags : uint32_t { NONE = 0x0, OPAQUE = 0x1 }; inline bool operator& (GeometryFlags a, GeometryFlags b) { return (int(a) & int(b)) ? true : false; } /* output operator for GeometryFlags */ inline std::ostream& operator<<(std::ostream& cout, const GeometryFlags& gflags) { #if !defined(__SYCL_DEVICE_ONLY__) if (gflags == GeometryFlags::NONE) return cout << "NONE"; if (gflags & GeometryFlags::OPAQUE) cout << "OPAQUE "; #endif return cout; } /* This structure is a header for each leaf type. Only the InstanceLeaf has a slightly different header. All primitives inside a leaf are of the same geometry, thus have the same geometry index (geomIndex), the same shader index (shaderIndex), the same geometry mask (geomMask), and the same geometry flags (geomFlags). The shaderIndex is used to calculate the shader record to invoke. This is an extension to DXR where the geomIndex is used for that purpose. For DXR we can always set the shaderIndex to be equal to the geomIndex. */ struct PrimLeafDesc { static const uint32_t MAX_GEOM_INDEX = 0x3FFFFFFF; static const uint32_t MAX_SHADER_INDEX = 0xFFFFFF; enum Type : uint32_t { TYPE_NONE = 0, /* For a node type of NODE_TYPE_PROCEDURAL we support enabling * and disabling the opaque/non_opaque culling. */ TYPE_OPACITY_CULLING_ENABLED = 0, TYPE_OPACITY_CULLING_DISABLED = 1 }; PrimLeafDesc() {} PrimLeafDesc(uint32_t shaderIndex, uint32_t geomIndex, GeometryFlags gflags, uint32_t geomMask, Type type = TYPE_NONE) : shaderIndex(shaderIndex), geomMask(geomMask), geomIndex(geomIndex), type(type), geomFlags((uint32_t)gflags) { if (shaderIndex > MAX_SHADER_INDEX) throw std::runtime_error("too large shader ID"); if (geomIndex > MAX_GEOM_INDEX) throw std::runtime_error("too large geometry ID"); } /* compares two PrimLeafDesc's for equality */ friend bool operator ==(const PrimLeafDesc& a, const PrimLeafDesc& b) { if (a.geomIndex != b.geomIndex) return false; assert(a.shaderIndex == b.shaderIndex); assert(a.geomMask == b.geomMask); assert(a.type == b.type); assert(a.geomFlags == b.geomFlags); return true; } friend bool operator !=(const PrimLeafDesc& a, const PrimLeafDesc& b) { return !(a == b); } void print(std::ostream& cout, uint32_t depth) const { #if !defined(__SYCL_DEVICE_ONLY__) cout << tab(depth) << "PrimLeafDesc {" << std::endl; cout << tab(depth) << " shaderIndex = " << shaderIndex << std::endl; cout << tab(depth) << " geomMask = " << std::bitset<8>(geomMask) << std::endl; cout << tab(depth) << " geomFlags = " << getGeomFlags() << std::endl; cout << tab(depth) << " geomIndex = " << geomIndex << std::endl; cout << tab(depth) << "}"; #endif } friend inline std::ostream& operator<<(std::ostream& cout, const PrimLeafDesc& desc) { desc.print(cout,0); return cout; } /* Checks if opaque culling is enabled. */ bool opaqueCullingEnabled() const { return type == TYPE_OPACITY_CULLING_ENABLED; } /* procedural instances store some valid shader index */ bool isProceduralInstance() const { return shaderIndex != 0xFFFFFF; } /* returns geometry flags */ GeometryFlags getGeomFlags() const { return (GeometryFlags) geomFlags; } public: uint32_t shaderIndex : 24; // shader index used for shader record calculations uint32_t geomMask : 8; // geometry mask used for ray masking uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene /*Type*/ uint32_t type : 1; // enable/disable culling for procedurals and instances /*GeometryFlags*/ uint32_t geomFlags : 2; // geometry flags of this geometry }; /* The QuadLeaf structure stores a single quad. A quad is a triangle pair with a shared edge. The first triangle has vertices v0,v1,v2, while the second triangle has vertices v[j0],v[j1],v[j2], thus the second triangle used local triangle indices. */ struct QuadLeaf { QuadLeaf() {} QuadLeaf (Vec3f v0, Vec3f v1, Vec3f v2, Vec3f v3, uint8_t j0, uint8_t j1, uint8_t j2, uint32_t shaderIndex, uint32_t geomIndex, uint32_t primIndex0, uint32_t primIndex1, GeometryFlags gflags, uint32_t geomMask, bool last) : leafDesc(shaderIndex,geomIndex,gflags,geomMask), primIndex0(primIndex0), primIndex1Delta(primIndex1-primIndex0), pad1(0), j0(j0),j1(j1),j2(j2),last(last),pad(0), v0(v0), v1(v1), v2(v2), v3(v3) { /* There are some constraints on the primitive indices. The * second primitive index always has to be the largest and the * distance between them can be at most 0xFFFF as we use 16 bits * to encode that difference. */ assert(primIndex0 <= primIndex1 && primIndex1 - primIndex0 < 0xFFFF); } /* returns the i'th vertex */ __forceinline Vec3f vertex(size_t i) const { assert(i < 4); return (&v0)[i]; } /* Checks if the specified triange is the last inside a leaf * list. */ bool isLast(uint32_t i = 1) const { assert(i<2); if (i == 0) return false; // the first triangle is never the last else return last; // the last bit tags the second triangle to be last } /* Checks if the second triangle exists. */ bool valid2() const { return !(j0 == 0 && j1 == 0 && j2 == 0); } /* Calculates the number of stored triangles. */ size_t size() const { return 1 + valid2(); } /* Calculates the effectively used bytes. If we store only one * triangle we waste the storage of one vertex. */ size_t usedBytes() const { if (valid2()) return sizeof(QuadLeaf); else return sizeof(QuadLeaf)-sizeof(Vec3f); } /* Calculates to delta to add to primIndex0 to get the primitive * index of the i'th triangle. */ uint32_t primIndexDelta(uint32_t i) const { assert(i<2); return i*primIndex1Delta; } /* Calculates the primitive index of the i'th triangle. */ uint32_t primIndex(uint32_t i) const { assert(i<2); return primIndex0 + primIndexDelta(i); } /* Quad mode is a special mode where the uv's over the quad are * defined over the entire range [0,1]x[0,1]. */ bool quadMode() const { return primIndex1Delta == 0; } /* Calculates the bounding box of this leaf. */ BBox3f bounds() const { BBox3f b = empty; b.extend(v0); b.extend(v1); b.extend(v2); if (valid2()) b.extend(v3); return b; } /* output of quad leaf */ void print(std::ostream& cout, uint32_t depth) const { #if !defined(__SYCL_DEVICE_ONLY__) cout << tab(depth) << "QuadLeaf {" << std::endl; cout << tab(depth) << " addr = " << this << std::endl; cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl; cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl; cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl; cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl; cout << tab(depth) << " triangle0 = { " << std::endl; cout << tab(depth) << " primIndex = " << primIndex(0) << std::endl; cout << tab(depth) << " v0 = " << v0 << std::endl; cout << tab(depth) << " v1 = " << v1 << std::endl; cout << tab(depth) << " v2 = " << v2 << std::endl; cout << tab(depth) << " }" << std::endl; if (valid2()) { cout << tab(depth) << " triangle1 = { " << std::endl; cout << tab(depth) << " primIndex = " << primIndex(1) << std::endl; cout << tab(depth) << " v0 = " << vertex(j0) << std::endl; cout << tab(depth) << " v1 = " << vertex(j1) << std::endl; cout << tab(depth) << " v2 = " << vertex(j2) << std::endl; cout << tab(depth) << " }" << std::endl; } cout << tab(depth) << "}"; #endif } /* output operator for QuadLeaf */ friend inline std::ostream& operator<<(std::ostream& cout, const QuadLeaf& leaf) { leaf.print(cout,0); return cout; } public: PrimLeafDesc leafDesc; // the leaf header uint32_t primIndex0; // primitive index of first triangle struct { uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle uint32_t pad1 : 11; // MBZ uint32_t j0 : 2; // specifies first vertex of second triangle uint32_t j1 : 2; // specified second vertex of second triangle uint32_t j2 : 2; // specified third vertex of second triangle uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list uint32_t pad : 9; // unused bits }; Vec3f v0; // first vertex of first triangle Vec3f v1; // second vertex of first triangle Vec3f v2; // third vertex of first triangle Vec3f v3; // forth vertex used for second triangle }; static_assert(sizeof(QuadLeaf) == 64, "QuadLeaf must be 64 bytes large"); /* Internal instance flags definition. */ struct InstanceFlags { enum Flags : uint8_t { NONE = 0x0, TRIANGLE_CULL_DISABLE = 0x1, // disables culling of front and back facing triangles through ray flags TRIANGLE_FRONT_COUNTERCLOCKWISE = 0x2, // for mirroring transformations the instance can switch front and backface of triangles FORCE_OPAQUE = 0x4, // forces all primitives inside this instance to be opaque FORCE_NON_OPAQUE = 0x8 // forces all primitives inside this instane to be non-opaque }; InstanceFlags() {} InstanceFlags(Flags rflags) : flags(rflags) {} InstanceFlags(uint8_t rflags) : flags((Flags)rflags) {} operator Flags () const { return flags; } /* output operator for InstanceFlags */ friend inline std::ostream& operator<<(std::ostream& cout, const InstanceFlags& iflags) { #if !defined(__SYCL_DEVICE_ONLY__) if (iflags == InstanceFlags::NONE) return cout << "NONE"; if (iflags.triangle_cull_disable) cout << "TRIANGLE_CULL_DISABLE "; if (iflags.triangle_front_counterclockwise) cout << "TRIANGLE_FRONT_COUNTERCLOCKWISE "; if (iflags.force_opaque) cout << "FORCE_OPAQUE "; if (iflags.force_non_opaque) cout << "FORCE_NON_OPAQUE "; #endif return cout; } public: union { Flags flags; struct { bool triangle_cull_disable : 1; bool triangle_front_counterclockwise : 1; bool force_opaque : 1; bool force_non_opaque : 1; }; }; }; inline InstanceFlags::Flags operator| (InstanceFlags::Flags a,InstanceFlags::Flags b) { return (InstanceFlags::Flags)(int(a) | int(b)); } /* The instance leaf represent an instance. It essentially stores transformation matrices (local to world as well as world to local) of the instance as well as a pointer to the start node of some BVH. The instance leaf consists of two parts, part0 (first 64 bytes) and part1 (second 64 bytes). Part0 will only get accessed by hardware and stores the world to local transformation as well as the BVH node to start traversal. Part1 stores additional data that is only read by the shader, e.g. it stores the local to world transformation of the instance. The layout of the first part of the InstanceLeaf is compatible with a ProceduralLeaf, thus we can use the same layout for software instancing if we want. */ struct InstanceLeaf { InstanceLeaf() {} InstanceLeaf (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask) { part0.shaderIndex = 0; //InstShaderRecordID; part0.geomMask = instMask; part0.instanceContributionToHitGroupIndex = 0; //desc.InstanceContributionToHitGroupIndex; part0.pad0 = 0; part0.type = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED; part0.geomFlags = (uint32_t) GeometryFlags::NONE; part0.startNodePtr = startNodePtr; //assert(int64_t(startNodePtr << 16) >> 16 == startNodePtr); // check that this is valid 48bit address part0.instFlags = (InstanceFlags) 0; part0.pad1 = 0; part1.instanceID = instUserID; part1.instanceIndex = instID; part1.bvhPtr = (uint64_t) 0; part1.pad = 0; part1.obj2world_vx = obj2world.l.vx; part1.obj2world_vy = obj2world.l.vy; part1.obj2world_vz = obj2world.l.vz; part0.obj2world_p = obj2world.p; const AffineSpace3f world2obj = rcp(obj2world); part0.world2obj_vx = world2obj.l.vx; part0.world2obj_vy = world2obj.l.vy; part0.world2obj_vz = world2obj.l.vz; part1.world2obj_p = world2obj.p; } /* Returns the address of the start node pointer. We need this * address to calculate relocation tables when dumping the BVH to * disk. */ const uint64_t startNodePtrAddr() const { return (uint64_t)((char*)&part0 + 8); } /* Returns the address of the BVH that contains the start node. */ const uint64_t bvhPtrAddr() const { return (uint64_t)&part1; } /* returns the world to object space transformation matrix. */ const AffineSpace3f World2Obj() const { return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p); } /* returns the object to world space transformation matrix. */ const AffineSpace3f Obj2World() const { return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p); } /* output operator for instance leaf */ void print (std::ostream& cout, uint32_t depth) const { #if !defined(__SYCL_DEVICE_ONLY__) if (!part0.type) cout << tab(depth) << "InstanceLeaf {" << std::endl; else cout << tab(depth) << "ProceduralInstanceLeaf {" << std::endl; cout << tab(depth) << " addr = " << this << std::endl; cout << tab(depth) << " shaderIndex = " << part0.shaderIndex << std::endl; cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl; cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl; cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl; cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl; cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl; cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl; cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl; cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl; cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl; cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl; cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl; cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl; cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl; cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl; cout << tab(depth) << "}"; #endif } /* output operator for InstanceLeaf */ friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeaf& leaf) { leaf.print(cout,0); return cout; } /* first 64 bytes accessed during traversal by hardware */ struct Part0 { /* Checks if opaque culling is enabled. */ bool opaqueCullingEnabled() const { return type == PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED; } public: uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing uint32_t geomMask : 8; // geometry mask used for ray masking uint32_t instanceContributionToHitGroupIndex : 24; uint32_t pad0 : 5; /* the following two entries are only used for procedural instances */ /*PrimLeafDesc::Type*/ uint32_t type : 1; // enables/disables opaque culling /*GeometryFlags*/ uint32_t geomFlags : 2; // unused for instances uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object uint64_t instFlags : 8; // flags for the instance (see InstanceFlags) uint64_t pad1 : 8; // unused bits Vec3f world2obj_vx; // 1st column of Worl2Obj transform Vec3f world2obj_vy; // 2nd column of Worl2Obj transform Vec3f world2obj_vz; // 3rd column of Worl2Obj transform Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes) } part0; /* second 64 bytes accessed during shading */ struct Part1 { uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too uint64_t pad : 16; // unused bits uint32_t instanceID; // user defined value per DXR spec uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene) Vec3f obj2world_vx; // 1st column of Obj2World transform Vec3f obj2world_vy; // 2nd column of Obj2World transform Vec3f obj2world_vz; // 3rd column of Obj2World transform Vec3f world2obj_p; // translation of World2Obj transform } part1; }; static_assert(sizeof(InstanceLeaf) == 128, "InstanceLeaf must be 128 bytes large"); /* Instance leaf layout for Xe3 */ struct InstanceLeafV2 { InstanceLeafV2() {} InstanceLeafV2 (AffineSpace3f obj2world, uint64_t startNodePtr, uint32_t instID, uint32_t instUserID, uint8_t instMask) { part0.instanceContributionToHitGroupIndex = 0; part0.geomMask = instMask; part0.instFlags = (InstanceFlags) 0; part0.ComparisonMode = 0; part0.ComparisonValue = 0; part0.pad0 = 0; part0.subType = SUB_TYPE_PROCEDURAL; part0.pad1 = 0; part0.DisableOpacityCull = PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED; part0.OpaqueGeometry = 0; part0.IgnoreRayMultiplier = 0; part0.startNodePtr = startNodePtr; part1.instanceID = instUserID; part1.instanceIndex = instID; part1.bvhPtr = (uint64_t) 0; part1.pad = 0; part1.obj2world_vx = obj2world.l.vx; part1.obj2world_vy = obj2world.l.vy; part1.obj2world_vz = obj2world.l.vz; part0.obj2world_p = obj2world.p; const AffineSpace3f world2obj = rcp(obj2world); part0.world2obj_vx = world2obj.l.vx; part0.world2obj_vy = world2obj.l.vy; part0.world2obj_vz = world2obj.l.vz; part1.world2obj_p = world2obj.p; } /* Returns the address of the start node pointer. We need this * address to calculate relocation tables when dumping the BVH to * disk. */ const uint64_t startNodePtrAddr() const { return (uint64_t)((char*)&part0 + 8); } /* Returns the address of the BVH that contains the start node. */ const uint64_t bvhPtrAddr() const { return (uint64_t)&part1; } /* returns the world to object space transformation matrix. */ const AffineSpace3f World2Obj() const { return AffineSpace3f(part0.world2obj_vx,part0.world2obj_vy,part0.world2obj_vz,part1.world2obj_p); } /* returns the object to world space transformation matrix. */ const AffineSpace3f Obj2World() const { return AffineSpace3f(part1.obj2world_vx,part1.obj2world_vy,part1.obj2world_vz,part0.obj2world_p); } /* output operator for instance leaf */ void print (std::ostream& cout, uint32_t depth) const { #if !defined(__SYCL_DEVICE_ONLY__) cout << tab(depth) << "InstanceLeaf {" << std::endl; cout << tab(depth) << " addr = " << this << std::endl; cout << tab(depth) << " geomMask = " << std::bitset<8>(part0.geomMask) << std::endl; cout << tab(depth) << " geomIndex = " << part1.instanceIndex << std::endl; cout << tab(depth) << " instanceID = " << part1.instanceID << std::endl; cout << tab(depth) << " instFlags = " << InstanceFlags(part0.instFlags) << std::endl; cout << tab(depth) << " startNodePtr = " << (void*)(size_t)part0.startNodePtr << std::endl; cout << tab(depth) << " obj2world.vx = " << part1.obj2world_vx << std::endl; cout << tab(depth) << " obj2world.vy = " << part1.obj2world_vy << std::endl; cout << tab(depth) << " obj2world.vz = " << part1.obj2world_vz << std::endl; cout << tab(depth) << " obj2world.p = " << part0.obj2world_p << std::endl; cout << tab(depth) << " world2obj.vx = " << part0.world2obj_vx << std::endl; cout << tab(depth) << " world2obj.vy = " << part0.world2obj_vy << std::endl; cout << tab(depth) << " world2obj.vz = " << part0.world2obj_vz << std::endl; cout << tab(depth) << " world2obj.p = " << part1.world2obj_p << std::endl; cout << tab(depth) << " instanceContributionToHitGroupIndex = " << part0.instanceContributionToHitGroupIndex << std::endl; cout << tab(depth) << "}"; #endif } /* output operator for InstanceLeaf */ friend inline std::ostream& operator<<(std::ostream& cout, const InstanceLeafV2& leaf) { leaf.print(cout,0); return cout; } /* first 64 bytes accessed during traversal by hardware */ struct Part0 { public: uint32_t instanceContributionToHitGroupIndex : 24; // Xe3: instance contribution to hit group index uint32_t geomMask : 8; // Xe1+: geometry mask used for ray masking uint32_t instFlags : 8; // Xe3: flags for the instance (see InstanceFlags) uint32_t ComparisonMode : 1; // Xe3: 0 for <=, 1 for > comparison uint32_t ComparisonValue : 7; // Xe3: to be compared with ray.ComparionMask uint32_t pad0 : 8; // reserved (MBZ) uint32_t subType : 3; // Xe3: geometry sub-type uint32_t pad1 : 2; // reserved (MBZ) uint32_t DisableOpacityCull : 1; // Xe1+: disables opacity culling uint32_t OpaqueGeometry : 1; // Xe1+: determines if geometry is opaque uint32_t IgnoreRayMultiplier : 1; // Xe3: ignores ray geometry multiplier uint64_t startNodePtr; // Xe3: 64 bit start node where of the instanced object Vec3f world2obj_vx; // 1st column of Worl2Obj transform Vec3f world2obj_vy; // 2nd column of Worl2Obj transform Vec3f world2obj_vz; // 3rd column of Worl2Obj transform Vec3f obj2world_p; // translation of Obj2World transform (on purpose in first 64 bytes) } part0; /* second 64 bytes accessed during shading */ struct Part1 { uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too uint64_t pad : 16; // unused bits uint32_t instanceID; // user defined value per DXR spec uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene) Vec3f obj2world_vx; // 1st column of Obj2World transform Vec3f obj2world_vy; // 2nd column of Obj2World transform Vec3f obj2world_vz; // 3rd column of Obj2World transform Vec3f world2obj_p; // translation of World2Obj transform } part1; }; static_assert(sizeof(InstanceLeafV2) == 128, "InstanceLeafV2 must be 128 bytes large"); /* Leaf type for procedural geometry. This leaf only contains the leaf header (which identifices the geometry) and a list of primitive indices. The BVH will typically reference only some of the primitives stores inside this leaf. The range is specified by a start primitive and the last primitive is tagged with a bit. */ struct ProceduralLeaf { static const uint32_t N = 13; /* Creates an empty procedural leaf. */ ProceduralLeaf () : leafDesc(PrimLeafDesc::MAX_SHADER_INDEX,PrimLeafDesc::MAX_GEOM_INDEX,GeometryFlags::NONE,0), numPrimitives(0), pad(0), last(0) { for (auto& id : _primIndex) id = 0xFFFFFFFF; } /* Creates a procedural leaf with one primitive. More primitives * of the same geometry can get added later using the add * function. */ ProceduralLeaf (PrimLeafDesc leafDesc, uint32_t primIndex, bool last) : leafDesc(leafDesc), numPrimitives(1), pad(0), last(last ? 0xFFFFFFFF : 0xFFFFFFFE) { for (auto& id : _primIndex) id = 0xFFFFFFFF; _primIndex[0] = primIndex; } /* returns the number of primitives stored inside this leaf */ uint32_t size() const { return numPrimitives; } /* Calculates the effectively used bytes. */ size_t usedBytes() const { /*if (leafDesc.isProceduralInstance()) return sizeof(InstanceLeaf); else*/ return sizeof(PrimLeafDesc)+4+4*numPrimitives; } /* if possible adds a new primitive to this leaf */ bool add(PrimLeafDesc leafDesc_in, uint32_t primIndex_in, bool last_in) { assert(primIndex_in != 0xFFFFFFFF); if (numPrimitives >= N) return false; if (!numPrimitives) leafDesc = leafDesc_in; if (leafDesc != leafDesc_in) return false; _primIndex[numPrimitives] = primIndex_in; if (last_in) last |= 1 << numPrimitives; else last &= ~(1 << numPrimitives); numPrimitives++; return true; } /* returns the primitive index of the i'th primitive */ uint32_t primIndex(uint32_t i) const { assert(i < N); return _primIndex[i]; } /* checks if the i'th primitive is the last in a leaf list */ bool isLast(uint32_t i) const { if (i >= N) return true; // just to make some verify tests happy else return (last >> i) & 1; } /* output operator for procedural leaf */ void print (std::ostream& cout, uint32_t i, uint32_t depth) const { #if !defined(__SYCL_DEVICE_ONLY__) cout << tab(depth) << "ProceduralLeaf {" << std::endl; cout << tab(depth) << " addr = " << this << std::endl; cout << tab(depth) << " slot = " << i << std::endl; if (i < N) { cout << tab(depth) << " shaderIndex = " << leafDesc.shaderIndex << std::endl; cout << tab(depth) << " geomMask = " << std::bitset<8>(leafDesc.geomMask) << std::endl; cout << tab(depth) << " geomFlags = " << leafDesc.getGeomFlags() << std::endl; cout << tab(depth) << " geomIndex = " << leafDesc.geomIndex << std::endl; cout << tab(depth) << " primIndex = " << primIndex(i) << std::endl; } else { cout << tab(depth) << " INVALID" << std::endl; } cout << tab(depth) << "}"; #endif } public: PrimLeafDesc leafDesc; // leaf header identifying the geometry uint32_t numPrimitives : 4; // number of stored primitives uint32_t pad : 32-4-N; uint32_t last : N; // bit vector with a last bit per primitive uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf }; static_assert(sizeof(ProceduralLeaf) == 64, "ProceduralLeaf must be 64 bytes large"); } level-zero-raytracing-support-1.2.3/rtbuild/math/000077500000000000000000000000001514453371700220645ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rtbuild/math/CMakeLists.txt000066400000000000000000000007261514453371700246310ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 ADD_LIBRARY(math STATIC constants.cpp) SET_PROPERTY(TARGET math PROPERTY FOLDER common) SET_PROPERTY(TARGET math APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}") IF (EMBREE_STATIC_LIB) INSTALL(TARGETS math EXPORT math-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel) INSTALL(EXPORT math-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) ENDIF() level-zero-raytracing-support-1.2.3/rtbuild/math/affinespace.h000066400000000000000000000231701514453371700245040ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once //#include "linearspace2.h" #include "linearspace3.h" //#include "quaternion.h" #include "bbox.h" //#include "vec4.h" namespace embree { #define VectorT typename L::Vector #define ScalarT typename L::Vector::Scalar //////////////////////////////////////////////////////////////////////////////// // Affine Space //////////////////////////////////////////////////////////////////////////////// template struct AffineSpaceT { L l; /*< linear part of affine space */ VectorT p; /*< affine part of affine space */ //////////////////////////////////////////////////////////////////////////////// // Constructors, Assignment, Cast, Copy Operations //////////////////////////////////////////////////////////////////////////////// __forceinline AffineSpaceT ( ) { } __forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; } __forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); } __forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; } __forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {} __forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {} template __forceinline AffineSpaceT( const AffineSpaceT& s ) : l(s.l), p(s.p) {} //////////////////////////////////////////////////////////////////////////////// // Constants //////////////////////////////////////////////////////////////////////////////// __forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {} __forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {} /*! return matrix for scaling */ static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); } /*! return matrix for translation */ static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); } /*! return matrix for rotation, only in 2D */ static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); } /*! return matrix for rotation around arbitrary point (2D) or axis (3D) */ static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); } /*! return matrix for rotation around arbitrary axis and point, only in 3D */ static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); } /*! return matrix for looking at given point, only in 3D */ static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) { VectorT Z = normalize(point-eye); VectorT U = normalize(cross(up,Z)); VectorT V = normalize(cross(Z,U)); return AffineSpaceT(L(U,V,Z),eye); } }; // template specialization to get correct identity matrix for type AffineSpace3fa template<> __forceinline AffineSpaceT::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {} //////////////////////////////////////////////////////////////////////////////// // Unary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline AffineSpaceT operator -( const AffineSpaceT& a ) { return AffineSpaceT(-a.l,-a.p); } template __forceinline AffineSpaceT operator +( const AffineSpaceT& a ) { return AffineSpaceT(+a.l,+a.p); } template __forceinline AffineSpaceT rcp( const AffineSpaceT& a ) { L il = rcp(a.l); return AffineSpaceT(il,-(il*a.p)); } //////////////////////////////////////////////////////////////////////////////// // Binary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline const AffineSpaceT operator +( const AffineSpaceT& a, const AffineSpaceT& b ) { return AffineSpaceT(a.l+b.l,a.p+b.p); } template __forceinline const AffineSpaceT operator -( const AffineSpaceT& a, const AffineSpaceT& b ) { return AffineSpaceT(a.l-b.l,a.p-b.p); } template __forceinline const AffineSpaceT operator *( const ScalarT & a, const AffineSpaceT& b ) { return AffineSpaceT(a*b.l,a*b.p); } template __forceinline const AffineSpaceT operator *( const AffineSpaceT& a, const AffineSpaceT& b ) { return AffineSpaceT(a.l*b.l,a.l*b.p+a.p); } template __forceinline const AffineSpaceT operator /( const AffineSpaceT& a, const AffineSpaceT& b ) { return a * rcp(b); } template __forceinline const AffineSpaceT operator /( const AffineSpaceT& a, const ScalarT & b ) { return a * rcp(b); } template __forceinline AffineSpaceT& operator *=( AffineSpaceT& a, const AffineSpaceT& b ) { return a = a * b; } template __forceinline AffineSpaceT& operator *=( AffineSpaceT& a, const ScalarT & b ) { return a = a * b; } template __forceinline AffineSpaceT& operator /=( AffineSpaceT& a, const AffineSpaceT& b ) { return a = a / b; } template __forceinline AffineSpaceT& operator /=( AffineSpaceT& a, const ScalarT & b ) { return a = a / b; } template __forceinline VectorT xfmPoint (const AffineSpaceT& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); } template __forceinline VectorT xfmVector(const AffineSpaceT& m, const VectorT& v) { return xfmVector(m.l,v); } template __forceinline VectorT xfmNormal(const AffineSpaceT& m, const VectorT& n) { return xfmNormal(m.l,n); } __forceinline const BBox xfmBounds(const AffineSpaceT >& m, const BBox& b) { BBox3fa dst = empty; const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0)); const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1)); const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2)); const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3)); const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4)); const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5)); const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6)); const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7)); return dst; } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline bool operator ==( const AffineSpaceT& a, const AffineSpaceT& b ) { return a.l == b.l && a.p == b.p; } template __forceinline bool operator !=( const AffineSpaceT& a, const AffineSpaceT& b ) { return a.l != b.l || a.p != b.p; } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// template __forceinline AffineSpaceT select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT& t, const AffineSpaceT& f ) { return AffineSpaceT(select(s,t.l,f.l),select(s,t.p,f.p)); } //////////////////////////////////////////////////////////////////////////////// // Output Operators //////////////////////////////////////////////////////////////////////////////// template static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT& m) { return cout << "{ l = " << m.l << ", p = " << m.p << " }"; } //////////////////////////////////////////////////////////////////////////////// // Template Instantiations //////////////////////////////////////////////////////////////////////////////// //typedef AffineSpaceT AffineSpace2f; typedef AffineSpaceT AffineSpace3f; typedef AffineSpaceT AffineSpace3fa; typedef AffineSpaceT AffineSpace3fx; typedef AffineSpaceT AffineSpace3ff; //typedef AffineSpaceT OrthonormalSpace3f; template using AffineSpace3vf = AffineSpaceT>>>; typedef AffineSpaceT>>> AffineSpace3vf4; typedef AffineSpaceT>>> AffineSpace3vf8; typedef AffineSpaceT>>> AffineSpace3vf16; //template using AffineSpace3vff = AffineSpaceT>>>; //typedef AffineSpaceT>>> AffineSpace3vfa4; //typedef AffineSpaceT>>> AffineSpace3vfa8; //typedef AffineSpaceT>>> AffineSpace3vfa16; ////////////////////////////////////////////////////////////////////////////// /// Interpolation ////////////////////////////////////////////////////////////////////////////// template __forceinline AffineSpaceT lerp(const AffineSpaceT& M0, const AffineSpaceT& M1, const R& t) { return AffineSpaceT(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t)); } #undef VectorT #undef ScalarT } level-zero-raytracing-support-1.2.3/rtbuild/math/bbox.h000066400000000000000000000260461514453371700231770ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "vec2.h" #include "vec3.h" namespace embree { namespace internal { template __forceinline T divideByTwo(const T& v) { return v / T(2); } template <> __forceinline float divideByTwo(const float& v) { return v * 0.5f; } template <> __forceinline double divideByTwo(const double& v) { return v * 0.5; } } // namespace internal template struct BBox { T lower, upper; //////////////////////////////////////////////////////////////////////////////// /// Construction //////////////////////////////////////////////////////////////////////////////// __forceinline BBox ( ) { } template __forceinline BBox ( const BBox& other ) : lower(other.lower), upper(other.upper) {} __forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; } __forceinline BBox ( const T& v ) : lower(v), upper(v) {} __forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {} //////////////////////////////////////////////////////////////////////////////// /// Extending Bounds //////////////////////////////////////////////////////////////////////////////// __forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; } __forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; } /*! tests if box is empty */ __forceinline bool empty() const { for (int i=0; i upper[i]) return true; return false; } /*! computes the size of the box */ __forceinline T size() const { return upper - lower; } /*! computes the center of the box */ __forceinline T center() const { return internal::divideByTwo(lower+upper); } /*! computes twice the center of the box */ __forceinline T center2() const { return lower+upper; } /*! merges two boxes */ __forceinline static const BBox merge (const BBox& a, const BBox& b) { return BBox(min(a.lower, b.lower), max(a.upper, b.upper)); } /*! intersects two boxes */ __forceinline static const BBox intersect (const BBox& a, const BBox& b) { return BBox(max(a.lower, b.lower), min(a.upper, b.upper)); } /*! enlarge box by some scaling factor */ __forceinline BBox enlarge_by(const float a) const { return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper)); } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {} __forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {} __forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {} __forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {} __forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {} __forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {} }; template<> __forceinline bool BBox::empty() const { return lower > upper; } template<> __forceinline bool BBox::empty() const { return !all(le_mask(lower,upper)); } template<> __forceinline bool BBox::empty() const { return !all(le_mask(lower,upper)); } /*! tests if box is finite */ __forceinline bool isvalid( const BBox& v ) { return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE))); } /*! tests if box is finite and non-empty*/ __forceinline bool isvalid_non_empty( const BBox& v ) { return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper)); } /*! tests if box has finite entries */ __forceinline bool is_finite( const BBox& b) { return is_finite(b.lower) && is_finite(b.upper); } /*! test if point contained in box */ __forceinline bool inside ( const BBox& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); } /*! computes the center of the box */ template __forceinline const T center2(const BBox& box) { return box.lower + box.upper; } template __forceinline const T center (const BBox& box) { return internal::divideByTwo(center2(box)); } /*! computes the volume of a bounding box */ __forceinline float volume ( const BBox& b ) { return reduce_mul(b.size()); } __forceinline float safeVolume( const BBox& b ) { if (b.empty()) return 0.0f; else return volume(b); } /*! computes the volume of a bounding box */ __forceinline float volume( const BBox& b ) { return reduce_mul(b.size()); } /*! computes the surface area of a bounding box */ template __forceinline const T area( const BBox >& b ) { const Vec2 d = b.size(); return d.x*d.y; } template __forceinline const T halfArea( const BBox >& b ) { return halfArea(b.size()); } template __forceinline const T area( const BBox >& b ) { return T(2)*halfArea(b); } __forceinline float halfArea( const BBox& b ) { return halfArea(b.size()); } __forceinline float area( const BBox& b ) { return 2.0f*halfArea(b); } __forceinline float halfArea( const BBox& b ) { return halfArea(b.size()); } __forceinline float area( const BBox& b ) { return 2.0f*halfArea(b); } template __forceinline float safeArea( const BBox& b ) { if (b.empty()) return 0.0f; else return area(b); } template __forceinline float expectedApproxHalfArea(const BBox& box) { return halfArea(box); } /*! merges bounding boxes and points */ template __forceinline const BBox merge( const BBox& a, const T& b ) { return BBox(min(a.lower, b ), max(a.upper, b )); } template __forceinline const BBox merge( const T& a, const BBox& b ) { return BBox(min(a , b.lower), max(a , b.upper)); } template __forceinline const BBox merge( const BBox& a, const BBox& b ) { return BBox(min(a.lower, b.lower), max(a.upper, b.upper)); } /*! Merges three boxes. */ template __forceinline const BBox merge( const BBox& a, const BBox& b, const BBox& c ) { return merge(a,merge(b,c)); } /*! Merges four boxes. */ template __forceinline BBox merge(const BBox& a, const BBox& b, const BBox& c, const BBox& d) { return merge(merge(a,b),merge(c,d)); } /*! Comparison Operators */ template __forceinline bool operator==( const BBox& a, const BBox& b ) { return a.lower == b.lower && a.upper == b.upper; } template __forceinline bool operator!=( const BBox& a, const BBox& b ) { return a.lower != b.lower || a.upper != b.upper; } /*! scaling */ template __forceinline BBox operator *( const float& a, const BBox& b ) { return BBox(a*b.lower,a*b.upper); } template __forceinline BBox operator *( const T& a, const BBox& b ) { return BBox(a*b.lower,a*b.upper); } /*! translations */ template __forceinline BBox operator +( const BBox& a, const BBox& b ) { return BBox(a.lower+b.lower,a.upper+b.upper); } template __forceinline BBox operator -( const BBox& a, const BBox& b ) { return BBox(a.lower-b.lower,a.upper-b.upper); } template __forceinline BBox operator +( const BBox& a, const T & b ) { return BBox(a.lower+b ,a.upper+b ); } template __forceinline BBox operator -( const BBox& a, const T & b ) { return BBox(a.lower-b ,a.upper-b ); } /*! extension */ template __forceinline BBox enlarge(const BBox& a, const T& b) { return BBox(a.lower-b, a.upper+b); } /*! intersect bounding boxes */ template __forceinline const BBox intersect( const BBox& a, const BBox& b ) { return BBox(max(a.lower, b.lower), min(a.upper, b.upper)); } template __forceinline const BBox intersect( const BBox& a, const BBox& b, const BBox& c ) { return intersect(a,intersect(b,c)); } template __forceinline const BBox intersect( const BBox& a, const BBox& b, const BBox& c, const BBox& d ) { return intersect(intersect(a,b),intersect(c,d)); } /*! subtract bounds from each other */ template __forceinline void subtract(const BBox& a, const BBox& b, BBox& c, BBox& d) { c.lower = a.lower; c.upper = min(a.upper,b.lower); d.lower = max(a.lower,b.upper); d.upper = a.upper; } /*! tests if bounding boxes (and points) are disjoint (empty intersection) */ template __inline bool disjoint( const BBox& a, const BBox& b ) { return intersect(a,b).empty(); } template __inline bool disjoint( const BBox& a, const T& b ) { return disjoint(a,BBox(b)); } template __inline bool disjoint( const T& a, const BBox& b ) { return disjoint(BBox(a),b); } /*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */ template __inline bool conjoint( const BBox& a, const BBox& b ) { return !intersect(a,b).empty(); } template __inline bool conjoint( const BBox& a, const T& b ) { return conjoint(a,BBox(b)); } template __inline bool conjoint( const T& a, const BBox& b ) { return conjoint(BBox(a),b); } /*! subset relation */ template __inline bool subset( const BBox& a, const BBox& b ) { for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false; for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false; return true; } template<> __inline bool subset( const BBox& a, const BBox& b ) { return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper)); } template<> __inline bool subset( const BBox& a, const BBox& b ) { return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper)); } /*! blending */ template __forceinline BBox lerp(const BBox& b0, const BBox& b1, const float t) { return BBox(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t)); } /*! output operator */ template __forceinline embree_ostream operator<<(embree_ostream cout, const BBox& box) { return cout << "[" << box.lower << "; " << box.upper << "]"; } /*! default template instantiations */ typedef BBox BBox1f; typedef BBox BBox2f; //typedef BBox BBox2fa; typedef BBox BBox3f; typedef BBox BBox3fa; typedef BBox BBox3fx; typedef BBox BBox3ff; } level-zero-raytracing-support-1.2.3/rtbuild/math/constants.h000066400000000000000000000205021514453371700242500ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/platform.h" #include #define _USE_MATH_DEFINES #include // using cmath causes issues under Windows #include #include namespace embree { static MAYBE_UNUSED const float one_over_255 = 1.0f/255.0f; static MAYBE_UNUSED const float min_rcp_input = 1E-18f; // for abs(x) >= min_rcp_input the newton raphson rcp calculation does not fail /* we consider floating point numbers in that range as valid input numbers */ static MAYBE_UNUSED float FLT_LARGE = 1.844E18f; struct TrueTy { __forceinline operator bool( ) const { return true; } }; const constexpr TrueTy True = TrueTy(); struct FalseTy { __forceinline operator bool( ) const { return false; } }; const constexpr FalseTy False = FalseTy(); struct ZeroTy { __forceinline operator double ( ) const { return 0; } __forceinline operator float ( ) const { return 0; } __forceinline operator long long( ) const { return 0; } __forceinline operator unsigned long long( ) const { return 0; } __forceinline operator long ( ) const { return 0; } __forceinline operator unsigned long ( ) const { return 0; } __forceinline operator int ( ) const { return 0; } __forceinline operator unsigned int ( ) const { return 0; } __forceinline operator short ( ) const { return 0; } __forceinline operator unsigned short ( ) const { return 0; } __forceinline operator char ( ) const { return 0; } __forceinline operator unsigned char ( ) const { return 0; } }; const constexpr ZeroTy zero = ZeroTy(); struct OneTy { __forceinline operator double ( ) const { return 1; } __forceinline operator float ( ) const { return 1; } __forceinline operator long long( ) const { return 1; } __forceinline operator unsigned long long( ) const { return 1; } __forceinline operator long ( ) const { return 1; } __forceinline operator unsigned long ( ) const { return 1; } __forceinline operator int ( ) const { return 1; } __forceinline operator unsigned int ( ) const { return 1; } __forceinline operator short ( ) const { return 1; } __forceinline operator unsigned short ( ) const { return 1; } __forceinline operator char ( ) const { return 1; } __forceinline operator unsigned char ( ) const { return 1; } }; const constexpr OneTy one = OneTy(); struct NegInfTy { __forceinline operator double ( ) const { return -std::numeric_limits::infinity(); } __forceinline operator float ( ) const { return -std::numeric_limits::infinity(); } __forceinline operator long long( ) const { return std::numeric_limits::min(); } __forceinline operator unsigned long long( ) const { return std::numeric_limits::min(); } __forceinline operator long ( ) const { return std::numeric_limits::min(); } __forceinline operator unsigned long ( ) const { return std::numeric_limits::min(); } __forceinline operator int ( ) const { return std::numeric_limits::min(); } __forceinline operator unsigned int ( ) const { return std::numeric_limits::min(); } __forceinline operator short ( ) const { return std::numeric_limits::min(); } __forceinline operator unsigned short ( ) const { return std::numeric_limits::min(); } __forceinline operator char ( ) const { return std::numeric_limits::min(); } __forceinline operator unsigned char ( ) const { return std::numeric_limits::min(); } }; const constexpr NegInfTy neg_inf = NegInfTy(); struct PosInfTy { __forceinline operator double ( ) const { return std::numeric_limits::infinity(); } __forceinline operator float ( ) const { return std::numeric_limits::infinity(); } __forceinline operator long long( ) const { return std::numeric_limits::max(); } __forceinline operator unsigned long long( ) const { return std::numeric_limits::max(); } __forceinline operator long ( ) const { return std::numeric_limits::max(); } __forceinline operator unsigned long ( ) const { return std::numeric_limits::max(); } __forceinline operator int ( ) const { return std::numeric_limits::max(); } __forceinline operator unsigned int ( ) const { return std::numeric_limits::max(); } __forceinline operator short ( ) const { return std::numeric_limits::max(); } __forceinline operator unsigned short ( ) const { return std::numeric_limits::max(); } __forceinline operator char ( ) const { return std::numeric_limits::max(); } __forceinline operator unsigned char ( ) const { return std::numeric_limits::max(); } }; const constexpr PosInfTy inf = PosInfTy(); const constexpr PosInfTy pos_inf = PosInfTy(); struct NaNTy { __forceinline operator double( ) const { return std::numeric_limits::quiet_NaN(); } __forceinline operator float ( ) const { return std::numeric_limits::quiet_NaN(); } }; const constexpr NaNTy nan = NaNTy(); struct UlpTy { __forceinline operator double( ) const { return std::numeric_limits::epsilon(); } __forceinline operator float ( ) const { return std::numeric_limits::epsilon(); } }; const constexpr UlpTy ulp = UlpTy(); struct PiTy { __forceinline operator double( ) const { return double(M_PI); } __forceinline operator float ( ) const { return float(M_PI); } }; const constexpr PiTy pi = PiTy(); struct OneOverPiTy { __forceinline operator double( ) const { return double(M_1_PI); } __forceinline operator float ( ) const { return float(M_1_PI); } }; const constexpr OneOverPiTy one_over_pi = OneOverPiTy(); struct TwoPiTy { __forceinline operator double( ) const { return double(2.0*M_PI); } __forceinline operator float ( ) const { return float(2.0*M_PI); } }; const constexpr TwoPiTy two_pi = TwoPiTy(); struct OneOverTwoPiTy { __forceinline operator double( ) const { return double(0.5*M_1_PI); } __forceinline operator float ( ) const { return float(0.5*M_1_PI); } }; const constexpr OneOverTwoPiTy one_over_two_pi = OneOverTwoPiTy(); struct FourPiTy { __forceinline operator double( ) const { return double(4.0*M_PI); } __forceinline operator float ( ) const { return float(4.0*M_PI); } }; const constexpr FourPiTy four_pi = FourPiTy(); struct OneOverFourPiTy { __forceinline operator double( ) const { return double(0.25*M_1_PI); } __forceinline operator float ( ) const { return float(0.25*M_1_PI); } }; const constexpr OneOverFourPiTy one_over_four_pi = OneOverFourPiTy(); struct StepTy { __forceinline operator double ( ) const { return 0; } __forceinline operator float ( ) const { return 0; } __forceinline operator long long( ) const { return 0; } __forceinline operator unsigned long long( ) const { return 0; } __forceinline operator long ( ) const { return 0; } __forceinline operator unsigned long ( ) const { return 0; } __forceinline operator int ( ) const { return 0; } __forceinline operator unsigned int ( ) const { return 0; } __forceinline operator short ( ) const { return 0; } __forceinline operator unsigned short ( ) const { return 0; } __forceinline operator char ( ) const { return 0; } __forceinline operator unsigned char ( ) const { return 0; } }; const constexpr StepTy step = StepTy(); struct ReverseStepTy { }; const constexpr ReverseStepTy reverse_step = ReverseStepTy(); struct EmptyTy { }; const constexpr EmptyTy empty = EmptyTy(); struct FullTy { }; const constexpr FullTy full = FullTy(); struct UndefinedTy { }; const constexpr UndefinedTy undefined = UndefinedTy(); } level-zero-raytracing-support-1.2.3/rtbuild/math/emath.h000066400000000000000000000323741514453371700233440ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/platform.h" #include "../sys/intrinsics.h" #include "constants.h" #include #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) # include "math_sycl.h" #else #if defined(__ARM_NEON) #include "../simd/arm/emulation.h" #else #include #include #include #endif #if defined(__WIN32__) #if defined(_MSC_VER) && (_MSC_VER <= 1700) namespace std { __forceinline bool isinf ( const float x ) { return _finite(x) == 0; } __forceinline bool isnan ( const float x ) { return _isnan(x) != 0; } __forceinline bool isfinite (const float x) { return _finite(x) != 0; } } #endif #endif namespace embree { __forceinline bool isvalid ( const float& v ) { return (v > -FLT_LARGE) & (v < +FLT_LARGE); } __forceinline int cast_f2i(float f) { union { float f; int i; } v; v.f = f; return v.i; } __forceinline float cast_i2f(int i) { union { float f; int i; } v; v.i = i; return v.f; } __forceinline int toInt (const float& a) { return int(a); } __forceinline float toFloat(const int& a) { return float(a); } #if defined(__WIN32__) __forceinline bool finite ( const float x ) { return _finite(x) != 0; } #endif __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; } __forceinline float sqr ( const float x ) { return x*x; } __forceinline float rcp ( const float x ) { const __m128 a = _mm_set_ss(x); const __m128 r = _mm_rcp_ss(a); return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a)))); } __forceinline float signmsk ( const float x ) { return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); } __forceinline float xorf( const float x, const float y ) { return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y))); } __forceinline float andf( const float x, const unsigned y ) { return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y)))); } __forceinline float rsqrt( const float x ) { const __m128 a = _mm_set_ss(x); __m128 r = _mm_rsqrt_ss(a); const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r))); return _mm_cvtss_f32(c); } #if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700) __forceinline float nextafter(float x, float y) { if ((x0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); } __forceinline double nextafter(double x, double y) { return _nextafter(x, y); } __forceinline int roundf(float f) { return (int)(f + 0.5f); } #else __forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); } __forceinline double nextafter(double x, double y) { return ::nextafter(x, y); } #endif __forceinline float abs ( const float x ) { return ::fabsf(x); } __forceinline float acos ( const float x ) { return ::acosf (x); } __forceinline float asin ( const float x ) { return ::asinf (x); } __forceinline float atan ( const float x ) { return ::atanf (x); } __forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); } __forceinline float cos ( const float x ) { return ::cosf (x); } __forceinline float cosh ( const float x ) { return ::coshf (x); } __forceinline float exp ( const float x ) { return ::expf (x); } __forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); } __forceinline float log ( const float x ) { return ::logf (x); } __forceinline float log10( const float x ) { return ::log10f(x); } __forceinline float pow ( const float x, const float y ) { return ::powf (x, y); } __forceinline float sin ( const float x ) { return ::sinf (x); } __forceinline float sinh ( const float x ) { return ::sinhf (x); } __forceinline float sqrt ( const float x ) { return ::sqrtf (x); } __forceinline float tan ( const float x ) { return ::tanf (x); } __forceinline float tanh ( const float x ) { return ::tanhf (x); } __forceinline float floor( const float x ) { return ::floorf (x); } __forceinline float ceil ( const float x ) { return ::ceilf (x); } __forceinline float frac ( const float x ) { return x-floor(x); } __forceinline double abs ( const double x ) { return ::fabs(x); } __forceinline double sign ( const double x ) { return x<0?-1.0:1.0; } __forceinline double acos ( const double x ) { return ::acos (x); } __forceinline double asin ( const double x ) { return ::asin (x); } __forceinline double atan ( const double x ) { return ::atan (x); } __forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); } __forceinline double cos ( const double x ) { return ::cos (x); } __forceinline double cosh ( const double x ) { return ::cosh (x); } __forceinline double exp ( const double x ) { return ::exp (x); } __forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); } __forceinline double log ( const double x ) { return ::log (x); } __forceinline double log10( const double x ) { return ::log10(x); } __forceinline double pow ( const double x, const double y ) { return ::pow (x, y); } __forceinline double rcp ( const double x ) { return 1.0/x; } __forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); } __forceinline double sin ( const double x ) { return ::sin (x); } __forceinline double sinh ( const double x ) { return ::sinh (x); } __forceinline double sqr ( const double x ) { return x*x; } __forceinline double sqrt ( const double x ) { return ::sqrt (x); } __forceinline double tan ( const double x ) { return ::tan (x); } __forceinline double tanh ( const double x ) { return ::tanh (x); } __forceinline double floor( const double x ) { return ::floor (x); } __forceinline double ceil ( const double x ) { return ::ceil (x); } template __forceinline T twice(const T& a) { return a+a; } __forceinline int min(int a, int b) { return a __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); } template __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } template __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); } template __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); } template __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); } template __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); } __forceinline int max(int a, int b) { return a __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); } template __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } template __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); } template __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); } template __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); } template __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); } #if defined(__MACOSX__) __forceinline ssize_t min(ssize_t a, ssize_t b) { return a __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); } template __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); } template __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); } template __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); } template __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); } template __forceinline T cos2sin ( const T& x ) { return sin2cos(x); } __forceinline float madd ( const float a, const float b, const float c) { return a*b+c; } __forceinline float msub ( const float a, const float b, const float c) { return a*b-c; } __forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;} __forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; } /*! random functions */ template T random() { return T(0); } #if defined(_WIN32) template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); } template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); } #else template<> __forceinline int random() { return int(rand()); } template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); } #endif template<> __forceinline float random() { return rand()/float(RAND_MAX); } template<> __forceinline double random() { return rand()/double(RAND_MAX); } #if _WIN32 __forceinline double drand48() { return double(rand())/double(RAND_MAX); } __forceinline void srand48(long seed) { return srand(seed); } #endif /*! selects */ __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; } __forceinline int select(bool s, int t, int f) { return s ? t : f; } __forceinline float select(bool s, float t, float f) { return s ? t : f; } __forceinline bool none(bool s) { return !s; } __forceinline bool all (bool s) { return s; } __forceinline bool any (bool s) { return s; } __forceinline unsigned movemask (bool s) { return (unsigned)s; } __forceinline float lerp(const float v0, const float v1, const float t) { return madd(1.0f-t,v0,t*v1); } template __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) { return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3))); } /*! exchange */ template __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; } /* load/store */ template struct mem; template<> struct mem { static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } }; /*! bit reverse operation */ template __forceinline T bitReverse(const T& vin) { T v = vin; v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); v = ( v >> 16 ) | ( v << 16); return v; } /*! bit interleave operation */ template __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin) { T x = xin, y = yin, z = zin; x = (x | (x << 16)) & 0x030000FF; x = (x | (x << 8)) & 0x0300F00F; x = (x | (x << 4)) & 0x030C30C3; x = (x | (x << 2)) & 0x09249249; y = (y | (y << 16)) & 0x030000FF; y = (y | (y << 8)) & 0x0300F00F; y = (y | (y << 4)) & 0x030C30C3; y = (y | (y << 2)) & 0x09249249; z = (z | (z << 16)) & 0x030000FF; z = (z | (z << 8)) & 0x0300F00F; z = (z | (z << 4)) & 0x030C30C3; z = (z | (z << 2)) & 0x09249249; return x | (y << 1) | (z << 2); } } #endif level-zero-raytracing-support-1.2.3/rtbuild/math/linearspace3.h000066400000000000000000000250311514453371700246070ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "vec3.h" //#include "quaternion.h" namespace embree { //////////////////////////////////////////////////////////////////////////////// /// 3D Linear Transform (3x3 Matrix) //////////////////////////////////////////////////////////////////////////////// template struct LinearSpace3 { typedef T Vector; typedef typename T::Scalar Scalar; /*! default matrix constructor */ __forceinline LinearSpace3 ( ) {} __forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; } __forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; } template __forceinline LinearSpace3( const LinearSpace3& s ) : vx(s.vx), vy(s.vy), vz(s.vz) {} /*! matrix construction from column vectors */ __forceinline LinearSpace3(const Vector& vx, const Vector& vy, const Vector& vz) : vx(vx), vy(vy), vz(vz) {} /*! construction from quaternion */ /*__forceinline LinearSpace3( const QuaternionT& q ) : vx((q.r*q.r + q.i*q.i - q.j*q.j - q.k*q.k), 2.0f*(q.i*q.j + q.r*q.k), 2.0f*(q.i*q.k - q.r*q.j)) , vy(2.0f*(q.i*q.j - q.r*q.k), (q.r*q.r - q.i*q.i + q.j*q.j - q.k*q.k), 2.0f*(q.j*q.k + q.r*q.i)) , vz(2.0f*(q.i*q.k + q.r*q.j), 2.0f*(q.j*q.k - q.r*q.i), (q.r*q.r - q.i*q.i - q.j*q.j + q.k*q.k)) {}*/ /*! matrix construction from row mayor data */ __forceinline LinearSpace3(const Scalar& m00, const Scalar& m01, const Scalar& m02, const Scalar& m10, const Scalar& m11, const Scalar& m12, const Scalar& m20, const Scalar& m21, const Scalar& m22) : vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {} /*! compute the determinant of the matrix */ __forceinline const Scalar det() const { return dot(vx,cross(vy,vz)); } /*! compute adjoint matrix */ __forceinline const LinearSpace3 adjoint() const { return LinearSpace3(cross(vy,vz),cross(vz,vx),cross(vx,vy)).transposed(); } /*! compute inverse matrix */ __forceinline const LinearSpace3 inverse() const { return adjoint()/det(); } /*! compute transposed matrix */ __forceinline const LinearSpace3 transposed() const { return LinearSpace3(vx.x,vx.y,vx.z,vy.x,vy.y,vy.z,vz.x,vz.y,vz.z); } /*! returns first row of matrix */ __forceinline Vector row0() const { return Vector(vx.x,vy.x,vz.x); } /*! returns second row of matrix */ __forceinline Vector row1() const { return Vector(vx.y,vy.y,vz.y); } /*! returns third row of matrix */ __forceinline Vector row2() const { return Vector(vx.z,vy.z,vz.z); } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline LinearSpace3( ZeroTy ) : vx(zero), vy(zero), vz(zero) {} __forceinline LinearSpace3( OneTy ) : vx(one, zero, zero), vy(zero, one, zero), vz(zero, zero, one) {} /*! return matrix for scaling */ static __forceinline LinearSpace3 scale(const Vector& s) { return LinearSpace3(s.x, 0, 0, 0 , s.y, 0, 0 , 0, s.z); } /*! return matrix for rotation around arbitrary axis */ static __forceinline LinearSpace3 rotate(const Vector& _u, const Scalar& r) { Vector u = normalize(_u); Scalar s = sin(r), c = cos(r); return LinearSpace3(u.x*u.x+(1-u.x*u.x)*c, u.x*u.y*(1-c)-u.z*s, u.x*u.z*(1-c)+u.y*s, u.x*u.y*(1-c)+u.z*s, u.y*u.y+(1-u.y*u.y)*c, u.y*u.z*(1-c)-u.x*s, u.x*u.z*(1-c)-u.y*s, u.y*u.z*(1-c)+u.x*s, u.z*u.z+(1-u.z*u.z)*c); } public: /*! the column vectors of the matrix */ Vector vx,vy,vz; }; #if !defined(__SYCL_DEVICE_ONLY__) /*! compute transposed matrix */ template<> __forceinline const LinearSpace3 LinearSpace3::transposed() const { vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz); return LinearSpace3(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz)); } #endif template __forceinline const LinearSpace3 transposed(const LinearSpace3& xfm) { return xfm.transposed(); } //////////////////////////////////////////////////////////////////////////////// // Unary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline LinearSpace3 operator -( const LinearSpace3& a ) { return LinearSpace3(-a.vx,-a.vy,-a.vz); } template __forceinline LinearSpace3 operator +( const LinearSpace3& a ) { return LinearSpace3(+a.vx,+a.vy,+a.vz); } template __forceinline LinearSpace3 rcp ( const LinearSpace3& a ) { return a.inverse(); } /* constructs a coordinate frame form a normalized normal */ template __forceinline LinearSpace3 frame(const T& N) { const T dx0(0,N.z,-N.y); const T dx1(-N.z,0,N.x); const T dx = normalize(select(dot(dx0,dx0) > dot(dx1,dx1),dx0,dx1)); const T dy = normalize(cross(N,dx)); return LinearSpace3(dx,dy,N); } /* constructs a coordinate frame from a normal and approximate x-direction */ template __forceinline LinearSpace3 frame(const T& N, const T& dxi) { if (abs(dot(dxi,N)) > 0.99f) return frame(N); // fallback in case N and dxi are very parallel const T dx = normalize(cross(dxi,N)); const T dy = normalize(cross(N,dx)); return LinearSpace3(dx,dy,N); } /* clamps linear space to range -1 to +1 */ template __forceinline LinearSpace3 clamp(const LinearSpace3& space) { return LinearSpace3(clamp(space.vx,T(-1.0f),T(1.0f)), clamp(space.vy,T(-1.0f),T(1.0f)), clamp(space.vz,T(-1.0f),T(1.0f))); } //////////////////////////////////////////////////////////////////////////////// // Binary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline LinearSpace3 operator +( const LinearSpace3& a, const LinearSpace3& b ) { return LinearSpace3(a.vx+b.vx,a.vy+b.vy,a.vz+b.vz); } template __forceinline LinearSpace3 operator -( const LinearSpace3& a, const LinearSpace3& b ) { return LinearSpace3(a.vx-b.vx,a.vy-b.vy,a.vz-b.vz); } template __forceinline LinearSpace3 operator*(const typename T::Scalar & a, const LinearSpace3& b) { return LinearSpace3(a*b.vx, a*b.vy, a*b.vz); } template __forceinline T operator*(const LinearSpace3& a, const T & b) { return madd(T(b.x),a.vx,madd(T(b.y),a.vy,T(b.z)*a.vz)); } template __forceinline LinearSpace3 operator*(const LinearSpace3& a, const LinearSpace3& b) { return LinearSpace3(a*b.vx, a*b.vy, a*b.vz); } template __forceinline LinearSpace3 operator/(const LinearSpace3& a, const typename T::Scalar & b) { return LinearSpace3(a.vx/b, a.vy/b, a.vz/b); } template __forceinline LinearSpace3 operator/(const LinearSpace3& a, const LinearSpace3& b) { return a * rcp(b); } template __forceinline LinearSpace3& operator *=( LinearSpace3& a, const LinearSpace3& b ) { return a = a * b; } template __forceinline LinearSpace3& operator /=( LinearSpace3& a, const LinearSpace3& b ) { return a = a / b; } template __forceinline T xfmPoint (const LinearSpace3& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); } template __forceinline T xfmVector(const LinearSpace3& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); } template __forceinline T xfmNormal(const LinearSpace3& s, const T & a) { return xfmVector(s.inverse().transposed(),a); } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline bool operator ==( const LinearSpace3& a, const LinearSpace3& b ) { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; } template __forceinline bool operator !=( const LinearSpace3& a, const LinearSpace3& b ) { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// template __forceinline LinearSpace3 select ( const typename T::Scalar::Bool& s, const LinearSpace3& t, const LinearSpace3& f ) { return LinearSpace3(select(s,t.vx,f.vx),select(s,t.vy,f.vy),select(s,t.vz,f.vz)); } /*! blending */ template __forceinline LinearSpace3 lerp(const LinearSpace3& l0, const LinearSpace3& l1, const float t) { return LinearSpace3(lerp(l0.vx,l1.vx,t), lerp(l0.vy,l1.vy,t), lerp(l0.vz,l1.vz,t)); } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// template static embree_ostream operator<<(embree_ostream cout, const LinearSpace3& m) { return cout << "{ vx = " << m.vx << ", vy = " << m.vy << ", vz = " << m.vz << "}"; } /*! Shortcuts for common linear spaces. */ typedef LinearSpace3 LinearSpace3f; typedef LinearSpace3 LinearSpace3fa; typedef LinearSpace3 LinearSpace3fx; typedef LinearSpace3 LinearSpace3ff; template using LinearSpace3vf = LinearSpace3>>; typedef LinearSpace3>> LinearSpace3vf4; typedef LinearSpace3>> LinearSpace3vf8; typedef LinearSpace3>> LinearSpace3vf16; /*! blending */ template __forceinline LinearSpace3 lerp(const LinearSpace3& l0, const LinearSpace3& l1, const S& t) { return LinearSpace3(lerp(l0.vx,l1.vx,t), lerp(l0.vy,l1.vy,t), lerp(l0.vz,l1.vz,t)); } } level-zero-raytracing-support-1.2.3/rtbuild/math/range.h000066400000000000000000000071161514453371700233360ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/platform.h" #include "../math/emath.h" namespace embree { template struct range { __forceinline range() {} __forceinline range(const Ty& begin) : _begin(begin), _end(begin+1) {} __forceinline range(const Ty& begin, const Ty& end) : _begin(begin), _end(end) {} __forceinline range(const range& other) : _begin(other._begin), _end(other._end) {} template __forceinline range(const range& other) : _begin(Ty(other._begin)), _end(Ty(other._end)) {} template __forceinline range& operator =(const range& other) { _begin = other._begin; _end = other._end; return *this; } __forceinline Ty begin() const { return _begin; } __forceinline Ty end() const { return _end; } __forceinline range intersect(const range& r) const { return range (max(_begin,r._begin),min(_end,r._end)); } __forceinline Ty size() const { return _end - _begin; } __forceinline bool empty() const { return _end <= _begin; } __forceinline Ty center() const { return (_begin + _end)/2; } __forceinline std::pair split() const { const Ty _center = center(); return std::make_pair(range(_begin,_center),range(_center,_end)); } __forceinline void split(range& left_o, range& right_o) const { const Ty _center = center(); left_o = range(_begin,_center); right_o = range(_center,_end); } __forceinline friend bool operator< (const range& r0, const range& r1) { return r0.size() < r1.size(); } friend embree_ostream operator<<(embree_ostream cout, const range& r) { return cout << "range [" << r.begin() << ", " << r.end() << "]"; } Ty _begin, _end; }; template range make_range(const Ty& begin, const Ty& end) { return range(begin,end); } template struct extended_range : public range { __forceinline extended_range () {} __forceinline extended_range (const Ty& begin) : range(begin), _ext_end(begin+1) {} __forceinline extended_range (const Ty& begin, const Ty& end) : range(begin,end), _ext_end(end) {} __forceinline extended_range (const Ty& begin, const Ty& end, const Ty& ext_end) : range(begin,end), _ext_end(ext_end) {} __forceinline Ty ext_end() const { return _ext_end; } __forceinline Ty ext_size() const { return _ext_end - range::_begin; } __forceinline Ty ext_range_size() const { return _ext_end - range::_end; } __forceinline bool has_ext_range() const { assert(_ext_end >= range::_end); return (_ext_end - range::_end) > 0; } __forceinline void set_ext_range(const size_t ext_end){ assert(ext_end >= range::_end); _ext_end = ext_end; } __forceinline void move_right(const size_t plus){ range::_begin += plus; range::_end += plus; _ext_end += plus; } friend embree_ostream operator<<(embree_ostream cout, const extended_range& r) { return cout << "extended_range [" << r.begin() << ", " << r.end() << " (" << r.ext_end() << ")]"; } Ty _ext_end; }; } level-zero-raytracing-support-1.2.3/rtbuild/math/vec2.h000066400000000000000000000264541514453371700231070ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "emath.h" namespace embree { //struct Vec2fa; //////////////////////////////////////////////////////////////////////////////// /// Generic 2D vector Class //////////////////////////////////////////////////////////////////////////////// template struct Vec2 { enum { N = 2 }; union { struct { T x, y; }; T components[N]; }; typedef T Scalar; //////////////////////////////////////////////////////////////////////////////// /// Construction //////////////////////////////////////////////////////////////////////////////// __forceinline Vec2( ) {} __forceinline explicit Vec2( const T& a ) : x(a), y(a) {} __forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {} __forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; } //Vec2( const Vec2fa& other ); template __forceinline Vec2( const Vec2& a ) : x(T(a.x)), y(T(a.y)) {} template __forceinline Vec2& operator =( const Vec2& other ) { x = other.x; y = other.y; return *this; } __forceinline Vec2& operator =( const Vec2& other ) { x = other.x; y = other.y; return *this; } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline Vec2( ZeroTy ) : x(zero), y(zero) {} __forceinline Vec2( OneTy ) : x(one), y(one) {} __forceinline Vec2( PosInfTy ) : x(pos_inf), y(pos_inf) {} __forceinline Vec2( NegInfTy ) : x(neg_inf), y(neg_inf) {} __forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return components[axis]; } __forceinline T& operator [](const size_t axis ) { assert(axis < 2); return components[axis]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec2 operator +( const Vec2& a ) { return Vec2(+a.x, +a.y); } template __forceinline Vec2 operator -( const Vec2& a ) { return Vec2(-a.x, -a.y); } template __forceinline Vec2 abs ( const Vec2& a ) { return Vec2(abs (a.x), abs (a.y)); } template __forceinline Vec2 rcp ( const Vec2& a ) { return Vec2(rcp (a.x), rcp (a.y)); } template __forceinline Vec2 rsqrt ( const Vec2& a ) { return Vec2(rsqrt(a.x), rsqrt(a.y)); } template __forceinline Vec2 sqrt ( const Vec2& a ) { return Vec2(sqrt (a.x), sqrt (a.y)); } template __forceinline Vec2 frac ( const Vec2& a ) { return Vec2(frac (a.x), frac (a.y)); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec2 operator +( const Vec2& a, const Vec2& b ) { return Vec2(a.x + b.x, a.y + b.y); } template __forceinline Vec2 operator +( const Vec2& a, const T& b ) { return Vec2(a.x + b , a.y + b ); } template __forceinline Vec2 operator +( const T& a, const Vec2& b ) { return Vec2(a + b.x, a + b.y); } template __forceinline Vec2 operator -( const Vec2& a, const Vec2& b ) { return Vec2(a.x - b.x, a.y - b.y); } template __forceinline Vec2 operator -( const Vec2& a, const T& b ) { return Vec2(a.x - b , a.y - b ); } template __forceinline Vec2 operator -( const T& a, const Vec2& b ) { return Vec2(a - b.x, a - b.y); } template __forceinline Vec2 operator *( const Vec2& a, const Vec2& b ) { return Vec2(a.x * b.x, a.y * b.y); } template __forceinline Vec2 operator *( const T& a, const Vec2& b ) { return Vec2(a * b.x, a * b.y); } template __forceinline Vec2 operator *( const Vec2& a, const T& b ) { return Vec2(a.x * b , a.y * b ); } template __forceinline Vec2 operator /( const Vec2& a, const Vec2& b ) { return Vec2(a.x / b.x, a.y / b.y); } template __forceinline Vec2 operator /( const Vec2& a, const T& b ) { return Vec2(a.x / b , a.y / b ); } template __forceinline Vec2 operator /( const T& a, const Vec2& b ) { return Vec2(a / b.x, a / b.y); } template __forceinline Vec2 min(const Vec2& a, const Vec2& b) { return Vec2(min(a.x, b.x), min(a.y, b.y)); } template __forceinline Vec2 max(const Vec2& a, const Vec2& b) { return Vec2(max(a.x, b.x), max(a.y, b.y)); } //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec2 madd ( const Vec2& a, const Vec2& b, const Vec2& c) { return Vec2( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y) ); } template __forceinline Vec2 msub ( const Vec2& a, const Vec2& b, const Vec2& c) { return Vec2( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y) ); } template __forceinline Vec2 nmadd ( const Vec2& a, const Vec2& b, const Vec2& c) { return Vec2(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y) ); } template __forceinline Vec2 nmsub ( const Vec2& a, const Vec2& b, const Vec2& c) { return Vec2(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y) ); } template __forceinline Vec2 madd ( const T& a, const Vec2& b, const Vec2& c) { return Vec2( madd(a,b.x,c.x), madd(a,b.y,c.y) ); } template __forceinline Vec2 msub ( const T& a, const Vec2& b, const Vec2& c) { return Vec2( msub(a,b.x,c.x), msub(a,b.y,c.y) ); } template __forceinline Vec2 nmadd ( const T& a, const Vec2& b, const Vec2& c) { return Vec2(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y) ); } template __forceinline Vec2 nmsub ( const T& a, const Vec2& b, const Vec2& c) { return Vec2(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y) ); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec2& operator +=( Vec2& a, const Vec2& b ) { a.x += b.x; a.y += b.y; return a; } template __forceinline Vec2& operator -=( Vec2& a, const Vec2& b ) { a.x -= b.x; a.y -= b.y; return a; } template __forceinline Vec2& operator *=( Vec2& a, const T& b ) { a.x *= b ; a.y *= b ; return a; } template __forceinline Vec2& operator /=( Vec2& a, const T& b ) { a.x /= b ; a.y /= b ; return a; } //////////////////////////////////////////////////////////////////////////////// /// Reduction Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline T reduce_add( const Vec2& a ) { return a.x + a.y; } template __forceinline T reduce_mul( const Vec2& a ) { return a.x * a.y; } template __forceinline T reduce_min( const Vec2& a ) { return min(a.x, a.y); } template __forceinline T reduce_max( const Vec2& a ) { return max(a.x, a.y); } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline bool operator ==( const Vec2& a, const Vec2& b ) { return a.x == b.x && a.y == b.y; } template __forceinline bool operator !=( const Vec2& a, const Vec2& b ) { return a.x != b.x || a.y != b.y; } template __forceinline bool operator < ( const Vec2& a, const Vec2& b ) { if (a.x != b.x) return a.x < b.x; if (a.y != b.y) return a.y < b.y; return false; } //////////////////////////////////////////////////////////////////////////////// /// Shift Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec2 shift_right_1( const Vec2& a ) { return Vec2(shift_right_1(a.x),shift_right_1(a.y)); } //////////////////////////////////////////////////////////////////////////////// /// Euclidean Space Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline T dot ( const Vec2& a, const Vec2& b ) { return madd(a.x,b.x,a.y*b.y); } template __forceinline Vec2 cross ( const Vec2& a ) { return Vec2(-a.y,a.x); } template __forceinline T length ( const Vec2& a ) { return sqrt(dot(a,a)); } template __forceinline Vec2 normalize( const Vec2& a ) { return a*rsqrt(dot(a,a)); } template __forceinline T distance ( const Vec2& a, const Vec2& b ) { return length(a-b); } template __forceinline T det ( const Vec2& a, const Vec2& b ) { return a.x*b.y - a.y*b.x; } template __forceinline Vec2 normalize_safe( const Vec2& a ) { const T d = dot(a,a); return select(d == T( zero ),a, a*rsqrt(d) ); } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec2 select ( bool s, const Vec2& t, const Vec2& f ) { return Vec2(select(s,t.x,f.x),select(s,t.y,f.y)); } template __forceinline Vec2 select ( const Vec2& s, const Vec2& t, const Vec2& f ) { return Vec2(select(s.x,t.x,f.x),select(s.y,t.y,f.y)); } template __forceinline Vec2 select ( const typename T::Bool& s, const Vec2& t, const Vec2& f ) { return Vec2(select(s,t.x,f.x),select(s,t.y,f.y)); } template __forceinline Vec2 lerp(const Vec2& v0, const Vec2& v1, const T& t) { return madd(Vec2(T(1.0f)-t),v0,t*v1); } template __forceinline int maxDim ( const Vec2& a ) { const Vec2 b = abs(a); if (b.x > b.y) return 0; else return 1; } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline embree_ostream operator<<(embree_ostream cout, const Vec2& a) { return cout << "(" << a.x << ", " << a.y << ")"; } //////////////////////////////////////////////////////////////////////////////// /// Default template instantiations //////////////////////////////////////////////////////////////////////////////// typedef Vec2 Vec2b; typedef Vec2 Vec2i; typedef Vec2 Vec2f; } level-zero-raytracing-support-1.2.3/rtbuild/math/vec3.h000066400000000000000000000356561514453371700231140ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "emath.h" namespace embree { struct Vec3fa; //////////////////////////////////////////////////////////////////////////////// /// Generic 3D vector Class //////////////////////////////////////////////////////////////////////////////// template struct Vec3 { enum { N = 3 }; union { struct { T x, y, z; }; T components[N]; }; typedef T Scalar; //////////////////////////////////////////////////////////////////////////////// /// Construction //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3( ) {} __forceinline explicit Vec3( const T& a ) : x(a), y(a), z(a) {} __forceinline Vec3( const T& x, const T& y, const T& z ) : x(x), y(y), z(z) {} __forceinline Vec3( const Vec3& other ) { x = other.x; y = other.y; z = other.z; } __forceinline Vec3( const Vec3fa& other ); template __forceinline Vec3( const Vec3& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)) {} template __forceinline Vec3& operator =(const Vec3& other) { x = other.x; y = other.y; z = other.z; return *this; } __forceinline Vec3& operator =(const Vec3& other) { x = other.x; y = other.y; z = other.z; return *this; } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3( ZeroTy ) : x(zero), y(zero), z(zero) {} __forceinline Vec3( OneTy ) : x(one), y(one), z(one) {} __forceinline Vec3( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf) {} __forceinline Vec3( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf) {} __forceinline const T& operator [](const size_t axis) const { assert(axis < 3); return components[axis]; } __forceinline T& operator [](const size_t axis) { assert(axis < 3); return components[axis]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3 operator +( const Vec3& a ) { return Vec3(+a.x, +a.y, +a.z); } template __forceinline Vec3 operator -( const Vec3& a ) { return Vec3(-a.x, -a.y, -a.z); } template __forceinline Vec3 abs ( const Vec3& a ) { return Vec3(abs (a.x), abs (a.y), abs (a.z)); } template __forceinline Vec3 rcp ( const Vec3& a ) { return Vec3(rcp (a.x), rcp (a.y), rcp (a.z)); } template __forceinline Vec3 rsqrt ( const Vec3& a ) { return Vec3(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z)); } template __forceinline Vec3 sqrt ( const Vec3& a ) { return Vec3(sqrt (a.x), sqrt (a.y), sqrt (a.z)); } template __forceinline Vec3 zero_fix( const Vec3& a ) { return Vec3(select(abs(a.x) __forceinline Vec3 rcp_safe(const Vec3& a) { return rcp(zero_fix(a)); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3 operator +( const Vec3& a, const Vec3& b ) { return Vec3(a.x + b.x, a.y + b.y, a.z + b.z); } template __forceinline Vec3 operator -( const Vec3& a, const Vec3& b ) { return Vec3(a.x - b.x, a.y - b.y, a.z - b.z); } template __forceinline Vec3 operator *( const Vec3& a, const Vec3& b ) { return Vec3(a.x * b.x, a.y * b.y, a.z * b.z); } template __forceinline Vec3 operator *( const T& a, const Vec3& b ) { return Vec3(a * b.x, a * b.y, a * b.z); } template __forceinline Vec3 operator *( const Vec3& a, const T& b ) { return Vec3(a.x * b , a.y * b , a.z * b ); } template __forceinline Vec3 operator /( const Vec3& a, const T& b ) { return Vec3(a.x / b , a.y / b , a.z / b ); } template __forceinline Vec3 operator /( const T& a, const Vec3& b ) { return Vec3(a / b.x, a / b.y, a / b.z); } template __forceinline Vec3 operator /( const Vec3& a, const Vec3& b ) { return Vec3(a.x / b.x, a.y / b.y, a.z / b.z); } template __forceinline Vec3 min(const Vec3& a, const Vec3& b) { return Vec3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); } template __forceinline Vec3 max(const Vec3& a, const Vec3& b) { return Vec3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); } template __forceinline Vec3 operator >>( const Vec3& a, const int b ) { return Vec3(a.x >> b, a.y >> b, a.z >> b); } template __forceinline Vec3 operator <<( const Vec3& a, const int b ) { return Vec3(a.x << b, a.y << b, a.z << b); } //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3 madd ( const Vec3& a, const Vec3& b, const Vec3& c) { return Vec3( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); } template __forceinline Vec3 msub ( const Vec3& a, const Vec3& b, const Vec3& c) { return Vec3( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); } template __forceinline Vec3 nmadd ( const Vec3& a, const Vec3& b, const Vec3& c) { return Vec3(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z));} template __forceinline Vec3 nmsub ( const Vec3& a, const Vec3& b, const Vec3& c) { return Vec3(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z)); } template __forceinline Vec3 madd ( const T& a, const Vec3& b, const Vec3& c) { return Vec3( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z)); } template __forceinline Vec3 msub ( const T& a, const Vec3& b, const Vec3& c) { return Vec3( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z)); } template __forceinline Vec3 nmadd ( const T& a, const Vec3& b, const Vec3& c) { return Vec3(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z));} template __forceinline Vec3 nmsub ( const T& a, const Vec3& b, const Vec3& c) { return Vec3(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z)); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3& operator +=( Vec3& a, const T b ) { a.x += b; a.y += b; a.z += b; return a; } template __forceinline Vec3& operator +=( Vec3& a, const Vec3& b ) { a.x += b.x; a.y += b.y; a.z += b.z; return a; } template __forceinline Vec3& operator -=( Vec3& a, const Vec3& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; return a; } template __forceinline Vec3& operator *=( Vec3& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; return a; } template __forceinline Vec3& operator /=( Vec3& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; return a; } //////////////////////////////////////////////////////////////////////////////// /// Reduction Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline T reduce_add( const Vec3& a ) { return a.x + a.y + a.z; } template __forceinline T reduce_mul( const Vec3& a ) { return a.x * a.y * a.z; } template __forceinline T reduce_min( const Vec3& a ) { return min(a.x, a.y, a.z); } template __forceinline T reduce_max( const Vec3& a ) { return max(a.x, a.y, a.z); } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline bool operator ==( const Vec3& a, const Vec3& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; } template __forceinline bool operator !=( const Vec3& a, const Vec3& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; } template __forceinline bool operator < ( const Vec3& a, const Vec3& b ) { if (a.x != b.x) return a.x < b.x; if (a.y != b.y) return a.y < b.y; if (a.z != b.z) return a.z < b.z; return false; } //////////////////////////////////////////////////////////////////////////////// /// Shift Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3 shift_right_1( const Vec3& a ) { return Vec3(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z)); } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3 select ( bool s, const Vec3& t, const Vec3& f ) { return Vec3(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z)); } template __forceinline Vec3 select ( const Vec3& s, const Vec3& t, const Vec3& f ) { return Vec3(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z)); } template __forceinline Vec3 select ( const typename T::Bool& s, const Vec3& t, const Vec3& f ) { return Vec3(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z)); } template __forceinline Vec3 lerp(const Vec3& v0, const Vec3& v1, const T& t) { return madd(Vec3(T(1.0f)-t),v0,t*v1); } template __forceinline int maxDim ( const Vec3& a ) { const Vec3 b = abs(a); if (b.x > b.y) { if (b.x > b.z) return 0; else return 2; } else { if (b.y > b.z) return 1; else return 2; } } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline Vec3 eq_mask( const Vec3& a, const Vec3& b ) { return Vec3(a.x==b.x,a.y==b.y,a.z==b.z); } template __forceinline Vec3 neq_mask(const Vec3& a, const Vec3& b ) { return Vec3(a.x!=b.x,a.y!=b.y,a.z!=b.z); } template __forceinline Vec3 lt_mask( const Vec3& a, const Vec3& b ) { return Vec3(a.x< b.x,a.y< b.y,a.z< b.z); } template __forceinline Vec3 le_mask( const Vec3& a, const Vec3& b ) { return Vec3(a.x<=b.x,a.y<=b.y,a.z<=b.z); } template __forceinline Vec3 gt_mask( const Vec3& a, const Vec3& b ) { return Vec3(a.x> b.x,a.y> b.y,a.z> b.z); } template __forceinline Vec3 ge_mask( const Vec3& a, const Vec3& b ) { return Vec3(a.x>=b.x,a.y>=b.y,a.z>=b.z); } //////////////////////////////////////////////////////////////////////////////// /// Euclidean Space Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline T sqr ( const Vec3& a ) { return dot(a,a); } template __forceinline T dot ( const Vec3& a, const Vec3& b ) { return madd(a.x,b.x,madd(a.y,b.y,a.z*b.z)); } template __forceinline T length ( const Vec3& a ) { return sqrt(sqr(a)); } template __forceinline T rcp_length( const Vec3& a ) { return rsqrt(sqr(a)); } template __forceinline Vec3 normalize( const Vec3& a ) { return a*rsqrt(sqr(a)); } template __forceinline T distance ( const Vec3& a, const Vec3& b ) { return length(a-b); } template __forceinline Vec3 cross ( const Vec3& a, const Vec3& b ) { return Vec3(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); } template __forceinline Vec3 stable_triangle_normal( const Vec3& a, const Vec3& b, const Vec3& c ) { const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x; const T bc_x = b.z*c.y, bc_y = b.x*c.z, bc_z = b.y*c.x; const Vec3 cross_ab(msub(a.y,b.z,ab_x), msub(a.z,b.x,ab_y), msub(a.x,b.y,ab_z)); const Vec3 cross_bc(msub(b.y,c.z,bc_x), msub(b.z,c.x,bc_y), msub(b.x,c.y,bc_z)); const auto sx = abs(ab_x) < abs(bc_x); const auto sy = abs(ab_y) < abs(bc_y); const auto sz = abs(ab_z) < abs(bc_z); return Vec3(select(sx,cross_ab.x,cross_bc.x), select(sy,cross_ab.y,cross_bc.y), select(sz,cross_ab.z,cross_bc.z)); } template __forceinline T sum ( const Vec3& a ) { return a.x+a.y+a.z; } template __forceinline T halfArea ( const Vec3& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); } template __forceinline T area ( const Vec3& d ) { return 2.0f*halfArea(d); } template __forceinline Vec3 normalize_safe( const Vec3& a ) { const T d = dot(a,a); return select(d == T( zero ), a , a*rsqrt(d) ); } template __forceinline T sqr_point_to_line_distance(const Vec3& P, const Vec3& Q0, const Vec3& Q1) { const Vec3 N = cross(P-Q0,Q1-Q0); const Vec3 D = Q1-Q0; return dot(N,N)*rcp(dot(D,D)); } template __forceinline T sqr_point_to_line_distance(const Vec3& PmQ0, const Vec3& Q1mQ0) { const Vec3 N = cross(PmQ0,Q1mQ0); const Vec3 D = Q1mQ0; return dot(N,N)*rcp(dot(D,D)); } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// template __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3& a) { return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")"; } typedef Vec3 Vec3b; typedef Vec3 Vec3i; typedef Vec3 Vec3f; } #include "vec3ba.h" #include "vec3ia.h" #include "vec3fa.h" #include "../simd/sse.h" namespace embree { template<> __forceinline Vec3::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; } } level-zero-raytracing-support-1.2.3/rtbuild/math/vec3ba.h000066400000000000000000000127561514453371700234130ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/alloc.h" #include "emath.h" #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) # include "vec3ba_sycl.h" #else #include "../simd/sse.h" namespace embree { //////////////////////////////////////////////////////////////////////////////// /// SSE Vec3ba Type //////////////////////////////////////////////////////////////////////////////// struct __aligned(16) Vec3ba { ALIGNED_STRUCT_(16); union { __m128 m128; struct { int x,y,z; }; }; typedef int Scalar; enum { N = 3 }; //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ba( ) {} __forceinline Vec3ba( const __m128 input ) : m128(input) {} __forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {} __forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; } __forceinline explicit Vec3ba( bool a ) : m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {} __forceinline Vec3ba( bool a, bool b, bool c) : m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {} __forceinline operator const __m128&() const { return m128; } __forceinline operator __m128&() { return m128; } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {} __forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {} //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a.m128, Vec3ba(embree::True)); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a.m128, b.m128); } __forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a.m128, b.m128); } __forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a.m128, b.m128); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; } __forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; } __forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// __forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) == 7; } __forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) != 7; } __forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) { if (a.x != b.x) return a.x < b.x; if (a.y != b.y) return a.y < b.y; if (a.z != b.z) return a.z < b.z; return false; } //////////////////////////////////////////////////////////////////////////////// /// Reduction Operations //////////////////////////////////////////////////////////////////////////////// __forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) == 0x7; } __forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) != 0x0; } __forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x7; } __forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) != 0x0; } __forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x0; } __forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a) & 0x7; } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) { return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")"; } } #endif level-zero-raytracing-support-1.2.3/rtbuild/math/vec3fa.h000066400000000000000000000740111514453371700234070ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/alloc.h" #include "emath.h" #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) # include "vec3fa_sycl.h" #else #include "../simd/sse.h" namespace embree { //////////////////////////////////////////////////////////////////////////////// /// SSE Vec3fa Type //////////////////////////////////////////////////////////////////////////////// struct __aligned(16) Vec3fa { ALIGNED_STRUCT_(16); typedef float Scalar; enum { N = 3 }; union { __m128 m128; struct { float x,y,z; }; }; //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa( ) {} __forceinline Vec3fa( const __m128 a ) : m128(a) {} __forceinline Vec3fa ( const Vec3& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); } //__forceinline Vec3fa& operator =( const Vec3& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; } __forceinline Vec3fa ( const Vec3fa& other ) { m128 = other.m128; } __forceinline Vec3fa& operator =( const Vec3fa& other ) { m128 = other.m128; return *this; } __forceinline explicit Vec3fa( const float a ) : m128(_mm_set1_ps(a)) {} __forceinline Vec3fa( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {} __forceinline explicit Vec3fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {} __forceinline explicit operator const vfloat4() const { return vfloat4(m128); } __forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); } //__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); } __forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); } //__forceinline operator const __m128&() const { return m128; } //__forceinline operator __m128&() { return m128; } //////////////////////////////////////////////////////////////////////////////// /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// static __forceinline Vec3fa load( const void* const a ) { return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)))); } static __forceinline Vec3fa loadu( const void* const a ) { return Vec3fa(_mm_loadu_ps((float*)a)); } static __forceinline void storeu ( void* ptr, const Vec3fa& v ) { _mm_storeu_ps((float*)ptr,v.m128); } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa( ZeroTy ) : m128(_mm_setzero_ps()) {} __forceinline Vec3fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {} __forceinline Vec3fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {} __forceinline Vec3fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {} //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; } __forceinline Vec3fa operator -( const Vec3fa& a ) { const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); return _mm_xor_ps(a.m128, mask); } __forceinline Vec3fa abs ( const Vec3fa& a ) { const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return _mm_and_ps(a.m128, mask); } __forceinline Vec3fa sign ( const Vec3fa& a ) { return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128)); } __forceinline Vec3fa rcp ( const Vec3fa& a ) { const Vec3fa r = _mm_rcp_ps(a.m128); const Vec3fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a.m128, r.m128)); // First, compute 1 - a * r (which will be very close to 0) const Vec3fa res = _mm_add_ps(r.m128,_mm_mul_ps(r.m128, h_n.m128)); // Then compute r + r * h_n return res; } __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); } __forceinline Vec3fa sqr ( const Vec3fa& a ) { return _mm_mul_ps(a.m128,a.m128); } __forceinline Vec3fa rsqrt( const Vec3fa& a ) { __m128 r = _mm_rsqrt_ps(a.m128); return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); } __forceinline Vec3fa zero_fix(const Vec3fa& a) { return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input))); } __forceinline Vec3fa rcp_safe(const Vec3fa& a) { return rcp(zero_fix(a)); } __forceinline Vec3fa log ( const Vec3fa& a ) { return Vec3fa(logf(a.x),logf(a.y),logf(a.z)); } __forceinline Vec3fa exp ( const Vec3fa& a ) { return Vec3fa(expf(a.x),expf(a.y),expf(a.z)); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return _mm_add_ps(a.m128, b.m128); } __forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return _mm_sub_ps(a.m128, b.m128); } __forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return _mm_mul_ps(a.m128, b.m128); } __forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); } __forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; } __forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return _mm_div_ps(a.m128,b.m128); } __forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); } __forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); } __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); } __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); } //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; } __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;} __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; } __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; } __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); } __forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); } __forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); } __forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; } __forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; } __forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; } __forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; } __forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; } __forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; } //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// __forceinline float reduce_add(const Vec3fa& v) { const vfloat4 a(v.m128); const vfloat4 b = shuffle<1>(a); const vfloat4 c = shuffle<2>(a); return _mm_cvtss_f32(a+b+c); } __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; } __forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); } __forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// __forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; } __forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; } __forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpeq_ps (a.m128, b.m128); } __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); } __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); } __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); } __forceinline Vec3ba gt_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnle_ps(a.m128, b.m128); } __forceinline Vec3ba ge_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnlt_ps(a.m128, b.m128); } __forceinline bool isvalid ( const Vec3fa& v ) { return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE))); } __forceinline bool is_finite ( const Vec3fa& a ) { return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX))); } __forceinline bool isvalid4 ( const Vec3fa& v ) { return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE))); } __forceinline bool is_finite4 ( const Vec3fa& a ) { return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX))); } //////////////////////////////////////////////////////////////////////////////// /// Euclidean Space Operators //////////////////////////////////////////////////////////////////////////////// #if defined(__SSE4_1__) __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) { return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F)); } #else __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) { return reduce_add(a*b); } #endif __forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) { vfloat4 a0 = vfloat4(a.m128); vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128)); vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128)); vfloat4 b1 = vfloat4(b.m128); return Vec3fa(shuffle<1,2,0,3>(msub(a0,b0,a1*b1))); } __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); } __forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); } __forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); } __forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); } __forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); } __forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); } __forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); } __forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); } __forceinline Vec3fa normalize_safe( const Vec3fa& a ) { const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d); } /*! differentiated normalization */ __forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp) { const float pp = dot(p,p); const float pdp = dot(p,dp); return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp); } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) { __m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps(); return blendv_ps(f.m128, t.m128, mask); } __forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) { return blendv_ps(f.m128, t.m128, s); } __forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) { return madd(1.0f-t,v0,t*v1); } __forceinline int maxDim ( const Vec3fa& a ) { const Vec3fa b = abs(a); if (b.x > b.y) { if (b.x > b.z) return 0; else return 2; } else { if (b.y > b.z) return 1; else return 2; } } //////////////////////////////////////////////////////////////////////////////// /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// #if defined (__SSE4_1__) __forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); } __forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); } __forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); } #else __forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(truncf(a.x),truncf(a.y),truncf(a.z)); } __forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(floorf(a.x),floorf(a.y),floorf(a.z)); } __forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(ceilf (a.x),ceilf (a.y),ceilf (a.z)); } #endif //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) { return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")"; } typedef Vec3fa Vec3fa_t; //////////////////////////////////////////////////////////////////////////////// /// SSE Vec3fx Type //////////////////////////////////////////////////////////////////////////////// struct __aligned(16) Vec3fx { ALIGNED_STRUCT_(16); typedef float Scalar; enum { N = 3 }; union { __m128 m128; struct { float x,y,z; union { int a; unsigned u; float w; }; }; }; //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx( ) {} __forceinline Vec3fx( const __m128 a ) : m128(a) {} __forceinline explicit Vec3fx(const Vec3fa& v) : m128(v.m128) {} __forceinline operator Vec3fa () const { return Vec3fa(m128); } __forceinline explicit Vec3fx ( const Vec3& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); } //__forceinline Vec3fx& operator =( const Vec3& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; } __forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; } __forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; } __forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {} __forceinline Vec3fx( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {} __forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; } __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; } __forceinline Vec3fx( const Vec3fa& other, const float w1) { #if defined (__SSE4_1__) m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4); #else const vint4 mask(-1,-1,-1,0); m128 = select(vboolf4(_mm_castsi128_ps(mask)),vfloat4(other.m128),vfloat4(w1)); #endif } //__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly! //__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly! __forceinline Vec3fx( const float x, const float y, const float z, const float w) : m128(_mm_set_ps(w, z, y, x)) {} //__forceinline explicit Vec3fx( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {} __forceinline explicit operator const vfloat4() const { return vfloat4(m128); } __forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); } //__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); } __forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); } //__forceinline operator const __m128&() const { return m128; } //__forceinline operator __m128&() { return m128; } //////////////////////////////////////////////////////////////////////////////// /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// static __forceinline Vec3fx load( const void* const a ) { return Vec3fx(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)))); } static __forceinline Vec3fx loadu( const void* const a ) { return Vec3fx(_mm_loadu_ps((float*)a)); } static __forceinline void storeu ( void* ptr, const Vec3fx& v ) { _mm_storeu_ps((float*)ptr,v.m128); } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx( ZeroTy ) : m128(_mm_setzero_ps()) {} __forceinline Vec3fx( OneTy ) : m128(_mm_set1_ps(1.0f)) {} __forceinline Vec3fx( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {} __forceinline Vec3fx( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {} //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx operator +( const Vec3fx& a ) { return a; } __forceinline Vec3fx operator -( const Vec3fx& a ) { const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); return _mm_xor_ps(a.m128, mask); } __forceinline Vec3fx abs ( const Vec3fx& a ) { const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return _mm_and_ps(a.m128, mask); } __forceinline Vec3fx sign ( const Vec3fx& a ) { return blendv_ps(Vec3fx(one).m128, (-Vec3fx(one)).m128, _mm_cmplt_ps (a.m128,Vec3fx(zero).m128)); } __forceinline Vec3fx rcp ( const Vec3fx& a ) { const Vec3fx r = _mm_rcp_ps(a.m128); const Vec3fx res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128))); return res; } __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return _mm_sqrt_ps(a.m128); } __forceinline Vec3fx sqr ( const Vec3fx& a ) { return _mm_mul_ps(a.m128,a.m128); } __forceinline Vec3fx rsqrt( const Vec3fx& a ) { __m128 r = _mm_rsqrt_ps(a.m128); return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); } __forceinline Vec3fx zero_fix(const Vec3fx& a) { return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input))); } __forceinline Vec3fx rcp_safe(const Vec3fx& a) { return rcp(zero_fix(a)); } __forceinline Vec3fx log ( const Vec3fx& a ) { return Vec3fx(logf(a.x),logf(a.y),logf(a.z)); } __forceinline Vec3fx exp ( const Vec3fx& a ) { return Vec3fx(expf(a.x),expf(a.y),expf(a.z)); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return _mm_add_ps(a.m128, b.m128); } __forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return _mm_sub_ps(a.m128, b.m128); } __forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return _mm_mul_ps(a.m128, b.m128); } __forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); } __forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; } __forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return _mm_div_ps(a.m128,b.m128); } __forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); } __forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); } __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); } __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); } //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b+c; } __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b-c; } __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b+c;} __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b-c; } __forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); } __forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); } __forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); } __forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; } __forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; } __forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; } __forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; } __forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; } __forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; } //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// __forceinline float reduce_add(const Vec3fx& v) { const vfloat4 a(v.m128); const vfloat4 b = shuffle<1>(a); const vfloat4 c = shuffle<2>(a); return _mm_cvtss_f32(a+b+c); } __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; } __forceinline float reduce_min(const Vec3fx& v) { return min(v.x,v.y,v.z); } __forceinline float reduce_max(const Vec3fx& v) { return max(v.x,v.y,v.z); } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// __forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; } __forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; } __forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpeq_ps (a.m128, b.m128); } __forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpneq_ps(a.m128, b.m128); } __forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmplt_ps (a.m128, b.m128); } __forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmple_ps (a.m128, b.m128); } __forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnle_ps(a.m128, b.m128); } __forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); } __forceinline bool isvalid ( const Vec3fx& v ) { return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE))); } __forceinline bool is_finite ( const Vec3fx& a ) { return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX))); } __forceinline bool isvalid4 ( const Vec3fx& v ) { return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE))); } __forceinline bool is_finite4 ( const Vec3fx& a ) { return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX))); } //////////////////////////////////////////////////////////////////////////////// /// Euclidean Space Operators //////////////////////////////////////////////////////////////////////////////// #if defined(__SSE4_1__) __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) { return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F)); } #else __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) { return reduce_add(a*b); } #endif __forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) { vfloat4 a0 = vfloat4(a.m128); vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128)); vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128)); vfloat4 b1 = vfloat4(b.m128); return Vec3fx(shuffle<1,2,0,3>(msub(a0,b0,a1*b1))); } __forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); } __forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); } __forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); } __forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); } __forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); } __forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); } __forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); } __forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); } __forceinline Vec3fx normalize_safe( const Vec3fx& a ) { const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d); } /*! differentiated normalization */ __forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp) { const float pp = dot(p,p); const float pdp = dot(p,dp); return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp); } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) { __m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps(); return blendv_ps(f.m128, t.m128, mask); } __forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) { return blendv_ps(f.m128, t.m128, s); } __forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) { return madd(1.0f-t,v0,t*v1); } __forceinline int maxDim ( const Vec3fx& a ) { const Vec3fx b = abs(a); if (b.x > b.y) { if (b.x > b.z) return 0; else return 2; } else { if (b.y > b.z) return 1; else return 2; } } //////////////////////////////////////////////////////////////////////////////// /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// #if defined (__SSE4_1__) __forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); } __forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); } __forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); } #else __forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(truncf(a.x),truncf(a.y),truncf(a.z)); } __forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(floorf(a.x),floorf(a.y),floorf(a.z)); } __forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(ceilf (a.x),ceilf (a.y),ceilf (a.z)); } #endif //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) { return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")"; } typedef Vec3fx Vec3ff; } #endif level-zero-raytracing-support-1.2.3/rtbuild/math/vec3ia.h000066400000000000000000000226601514453371700234150ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/alloc.h" #include "emath.h" #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) # include "vec3ia_sycl.h" #else #include "../simd/sse.h" namespace embree { //////////////////////////////////////////////////////////////////////////////// /// SSE Vec3ia Type //////////////////////////////////////////////////////////////////////////////// struct __aligned(16) Vec3ia { ALIGNED_STRUCT_(16); union { __m128i m128; struct { int x,y,z; }; }; typedef int Scalar; enum { N = 3 }; //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia( ) {} __forceinline Vec3ia( const __m128i a ) : m128(a) {} __forceinline Vec3ia( const Vec3ia& other ) : m128(other.m128) {} __forceinline Vec3ia& operator =(const Vec3ia& other) { m128 = other.m128; return *this; } __forceinline explicit Vec3ia( const int a ) : m128(_mm_set1_epi32(a)) {} __forceinline Vec3ia( const int x, const int y, const int z) : m128(_mm_set_epi32(z, z, y, x)) {} __forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {} __forceinline operator const __m128i&() const { return m128; } __forceinline operator __m128i&() { return m128; } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia( ZeroTy ) : m128(_mm_setzero_si128()) {} __forceinline Vec3ia( OneTy ) : m128(_mm_set1_epi32(1)) {} __forceinline Vec3ia( PosInfTy ) : m128(_mm_set1_epi32(pos_inf)) {} __forceinline Vec3ia( NegInfTy ) : m128(_mm_set1_epi32(neg_inf)) {} //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia operator +( const Vec3ia& a ) { return a; } __forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); } #if defined(__SSSE3__) __forceinline Vec3ia abs ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); } #endif //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return _mm_add_epi32(a.m128, b.m128); } __forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); } __forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; } __forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return _mm_sub_epi32(a.m128, b.m128); } __forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); } __forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; } #if defined(__SSE4_1__) __forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); } __forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); } __forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; } #endif __forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return _mm_and_si128(a.m128, b.m128); } __forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); } __forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; } __forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return _mm_or_si128(a.m128, b.m128); } __forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); } __forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; } __forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return _mm_xor_si128(a.m128, b.m128); } __forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); } __forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; } __forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return _mm_slli_epi32(a.m128, n); } __forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return _mm_srai_epi32(a.m128, n); } __forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return _mm_slli_epi32(a.m128, b); } __forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return _mm_srai_epi32(a.m128, b); } __forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return _mm_srli_epi32(a.m128, b); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; } __forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; } __forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; } __forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; } #if defined(__SSE4_1__) __forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; } __forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; } #endif __forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; } __forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; } __forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; } __forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; } __forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; } __forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; } //////////////////////////////////////////////////////////////////////////////// /// Select //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) { #if defined(__SSE4_1__) return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); #else return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f)); #endif } //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// __forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; } __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; } __forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); } __forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// __forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) == 7; } __forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) != 7; } __forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) { if (a.x != b.x) return a.x < b.x; if (a.y != b.y) return a.y < b.y; if (a.z != b.z) return a.z < b.z; return false; } __forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32 (a.m128, b.m128)); } __forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); } __forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); } #if defined(__SSE4_1__) __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); } __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); } #else __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return select(lt_mask(a,b),a,b); } __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return select(gt_mask(a,b),a,b); } #endif //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) { return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")"; } } #endif level-zero-raytracing-support-1.2.3/rtbuild/node_type.h000066400000000000000000000031211514453371700232670ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include #include namespace embree { /* The type of a node. */ enum NodeType : uint8_t { NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children NODE_TYPE_INSTANCE = 0x1, // instance leaf NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf NODE_TYPE_QUAD = 0x4, // quad leaf NODE_TYPE_INVALID = 0x7 // indicates invalid node }; /* output operator for NodeType */ inline std::ostream& operator<<(std::ostream& _cout, const NodeType& _type) { #if !defined(__RTRT_GSIM) switch (_type) { case NODE_TYPE_INTERNAL: _cout << "INTERNAL"; break; case NODE_TYPE_INSTANCE: _cout << "INSTANCE"; break; case NODE_TYPE_PROCEDURAL: _cout << "PROCEDURAL"; break; case NODE_TYPE_QUAD: _cout << "QUAD"; break; case NODE_TYPE_INVALID: _cout << "INVALID"; break; default: _cout << "INVALID NODE TYPE"; break; } #endif return _cout; }; /* Sub-type definition for each NodeType */ enum SubType : uint8_t { SUB_TYPE_NONE = 0, /* sub-type for NODE_TYPE_INTERNAL */ SUB_TYPE_INTERNAL6 = 0x00, // Xe+: internal node with 6 children /* Sub-type for NODE_TYPE_QUAD */ SUB_TYPE_QUAD = 0, // Xe+: standard quad leaf (64 bytes) /* Sub-type for NODE_TYPE_PROCEDURAL */ SUB_TYPE_PROCEDURAL = 0, // Xe+: standard procedural leaf }; } level-zero-raytracing-support-1.2.3/rtbuild/qbvh6.cpp000066400000000000000000000206121514453371700226660ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "qbvh6.h" namespace embree { template void computeInternalNodeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area) { InternalNode* inner = node.innerNode(); size_t size = 0; for (uint32_t i = 0; i < InternalNode::NUM_CHILDREN; i++) { if (inner->valid(i)) { size++; computeStatistics(stats, inner->child(i), time_range, area(inner->bounds(i)), root_bounds_area, InternalNode::NUM_CHILDREN); } } /* update BVH statistics */ stats.internalNode.numNodes++; stats.internalNode.numChildrenUsed += size; stats.internalNode.numChildrenTotal += InternalNode::NUM_CHILDREN; stats.internalNode.nodeSAH += time_range.size() * node_bounds_area / root_bounds_area; stats.internalNode.numBytes += sizeof(InternalNode); } void computeStatistics(BVHStatistics& stats, QBVH6::Node node, const BBox1f time_range, const float node_bounds_area, const float root_bounds_area, uint32_t numChildren) { switch (node.type) { case NODE_TYPE_INSTANCE: { stats.instanceLeaf.numLeaves++; stats.instanceLeaf.numPrimsUsed++; stats.instanceLeaf.numPrimsTotal++; stats.instanceLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area; stats.instanceLeaf.numBytesUsed += sizeof(InstanceLeaf); stats.instanceLeaf.numBytesTotal += sizeof(InstanceLeaf); break; } case NODE_TYPE_QUAD: { bool last = false; stats.quadLeaf.numLeaves++; do { QuadLeaf* quad = node.leafNodeQuad(); node.node += sizeof(QuadLeaf); last = quad->isLast(); stats.quadLeaf.numPrimsUsed += quad->size(); stats.quadLeaf.numPrimsTotal += 2; stats.quadLeaf.numBytesUsed += quad->usedBytes(); stats.quadLeaf.numBytesTotal += sizeof(QuadLeaf); stats.quadLeaf.leafSAH += quad->size() * time_range.size() * node_bounds_area / root_bounds_area; } while (!last); break; } case NODE_TYPE_PROCEDURAL: { /*if (node.leafNodeProcedural()->leafDesc.isProceduralInstance()) // FIXME: for some reason we always to into this case!? { stats.proceduralLeaf.numLeaves++; stats.proceduralLeaf.numPrimsUsed += 1; stats.proceduralLeaf.numPrimsTotal += 1; stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area; stats.proceduralLeaf.numBytesUsed += sizeof(InstanceLeaf); stats.proceduralLeaf.numBytesTotal += sizeof(InstanceLeaf); } else*/ { bool last = false; uint32_t currPrim = node.cur_prim; stats.proceduralLeaf.numLeaves++; do { ProceduralLeaf* leaf = node.leafNodeProcedural(); last = leaf->isLast(currPrim); if (currPrim == 0) { stats.proceduralLeaf.numBlocks++; stats.proceduralLeaf.numBytesUsed += leaf->usedBytes(); stats.proceduralLeaf.numBytesTotal += sizeof(ProceduralLeaf); } uint32_t primsInBlock = leaf->size(); stats.proceduralLeaf.numPrimsUsed++; stats.proceduralLeaf.numPrimsTotal++; stats.proceduralLeaf.leafSAH += time_range.size() * node_bounds_area / root_bounds_area; if (++currPrim >= primsInBlock) { currPrim = 0; node.node += sizeof(ProceduralLeaf); } } while (!last); } break; } case NODE_TYPE_INTERNAL: { computeInternalNodeStatistics(stats, node, time_range, node_bounds_area, root_bounds_area); break; } default: assert(false); } } BVHStatistics QBVH6::computeStatistics() const { BVHStatistics stats; if (empty()) return stats; embree::computeStatistics(stats,root(),BBox1f(0,1),area(bounds),area(bounds),6); return stats; } template void QBVH6::printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren) { QInternalNode* inner = node.innerNode(); inner->print(cout, depth, false); std::cout << std::endl; for (uint32_t i = 0; i < QInternalNode::NUM_CHILDREN; i++) { if (inner->valid(i)) print(cout, inner->child(i), depth + 1, QInternalNode::NUM_CHILDREN); } cout << tab(depth) << "}" << std::endl; } void QBVH6::print( std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren) { switch (node.type) { case NODE_TYPE_INSTANCE: { node.leafNodeInstance()->print(cout,depth); cout << std::endl; break; } case NODE_TYPE_QUAD: { std::cout << tab(depth) << "List {" << std::endl; bool last = false; do { QuadLeaf* quad = node.leafNodeQuad(); node.node += sizeof(QuadLeaf); last = quad->isLast(); quad->print(cout,depth+1); std::cout << std::endl; } while (!last); std::cout << tab(depth) << "}" << std::endl; break; } case NODE_TYPE_PROCEDURAL: { /*if (!node.leafNodeProcedural()->leafDesc.opaqueCullingEnabled()) { InstanceLeaf* leaf = (InstanceLeaf*) node.node; leaf->print(cout,depth+1); std::cout << std::endl; } else*/ { std::cout << tab(depth) << "List {" << std::endl; bool last = false; uint32_t currPrim = node.cur_prim; do { ProceduralLeaf* leaf = node.leafNodeProcedural(); last = leaf->isLast(currPrim); uint32_t primsInBlock = leaf->size(); leaf->print(cout,currPrim,depth+1); std::cout << std::endl; if (++currPrim >= primsInBlock) { currPrim = 0; node.node += sizeof(ProceduralLeaf); } } while (!last); std::cout << tab(depth) << "}" << std::endl; } break; } case NODE_TYPE_INTERNAL: { printInternalNodeStatistics(cout, node, depth, numChildren); break; } default: std::cout << "{ INVALID_NODE }" << std::endl; //assert(false); } } unsigned* getBackPointersData(const QBVH6* base) { // FIXME: should be member function return (unsigned*)(((const char*)base) + 64 * base->backPointerDataStart); } unsigned getNumBackpointers(const QBVH6* base) { // FIXME: should be member function return ((base->backPointerDataEnd - base->backPointerDataStart) * 64) / sizeof(unsigned); } uint64_t getBackpointerChildOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function return 64 * uint64_t(base->nodeDataStart + idx); } uint64_t getParentFromBackpointerOffset(const QBVH6* base, unsigned idx) { // FIXME: should be member function return 64 * uint64_t(base->nodeDataStart + (getBackPointersData(base)[idx] >> 6)); } void QBVH6::print ( std::ostream& cout ) const { cout << "QBVH @ "<< this <<" header: {\n"; cout << " rootNodeOffset = " << rootNodeOffset << std::endl; cout << " bounds = " << bounds << std::endl; cout << " nodeDataStart = " << nodeDataStart << std::endl; cout << " nodeDataCur = " << nodeDataCur << std::endl; cout << " leafDataStart = " << leafDataCur << std::endl; cout << " leafDataCur = " << leafDataCur << std::endl; cout << " proceduralDataStart = " << proceduralDataStart << std::endl; cout << " proceduralDataCur = " << proceduralDataCur << std::endl; cout << " backPointerDataStart = " << backPointerDataStart << std::endl; cout << " backPointerDataEnd = " << backPointerDataEnd << std::endl; cout << " numPrims = " << numPrims << std::endl; cout << "}" << std::endl; if (empty()) return; print(cout,root(),0,6); if (hasBackPointers()) { cout << "backpointers: {\n"; for (unsigned bp = 0; bp < getNumBackpointers(this); ++bp) { cout << " node @ offset " << (void*)getBackpointerChildOffset(this, bp) << " parent = " << (void*)getParentFromBackpointerOffset(this, bp) << ", num children = " << ((getBackPointersData(this)[bp] >> 3) & 0x7) << "\n"; } cout << "}\n"; } } } level-zero-raytracing-support-1.2.3/rtbuild/qbvh6.h000066400000000000000000000201221514453371700223270ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "qnode.h" #include "statistics.h" #include "rtbuild.h" namespace embree { /* The QBVH6 structure defines the bounding volume hierarchy (BVH) that is used by the hardware. It is a BVH with 6-wide branching factor, and quantized bounding boxes. At the leaf level quads (QuadLeaf type), procedural geometries (ProceduralLeaf type), and instances (InstanceLeaf type) can get referenced. */ inline constexpr size_t roundOffsetTo128(size_t offset) { return 2 * ((offset + 127) / 128); } struct QBVH6 { typedef NodeRef Node; typedef InternalNode InternalNode6; static constexpr uint64_t rootNodeOffset = 128; static_assert(sizeof(InternalNode6) == 64, "InternalNode6 must be 64 bytes large"); /* structure used to initialize the memory allocator inside the BVH */ struct SizeEstimate { SizeEstimate () : nodeBytes(0), leafBytes(0), proceduralBytes(0) {} SizeEstimate (size_t nodeBytes, size_t leafBytes, size_t proceduralBytes) : nodeBytes(nodeBytes), leafBytes(leafBytes), proceduralBytes(proceduralBytes) {} size_t bytes() const { return sizeof(QBVH6) + nodeBytes + leafBytes + proceduralBytes; } friend bool operator<= (SizeEstimate a, SizeEstimate b) { if (a.nodeBytes > b.nodeBytes) return false; if (a.leafBytes > b.leafBytes) return false; if (a.proceduralBytes > b.proceduralBytes) return false; return true; } friend SizeEstimate operator+ (const SizeEstimate& a, const SizeEstimate& b) { return SizeEstimate(a.nodeBytes + b.nodeBytes, a.leafBytes + b.leafBytes, a.proceduralBytes + b.proceduralBytes); } /* output operator */ friend inline std::ostream& operator<<(std::ostream& cout, const SizeEstimate& estimate) { cout << "SizeEstimate {" << std::endl; cout << " nodeBytes = " << estimate.nodeBytes << ", " << std::endl; cout << " leafBytes = " << estimate.leafBytes << ", " << std::endl; cout << " proceduralBytes = " << estimate.proceduralBytes << ", " << std::endl; return cout << "}"; } public: size_t nodeBytes; // bytes required to store internal nodes size_t leafBytes; // bytes required to store leaf nodes size_t proceduralBytes; // bytes required to store procedural leaf nodes }; /* Initializes a QBVH6 node with its provided size. The memory for * the QBVH6 structure is overallocated and the allocation size is * provided to the constructor, such that the allocator of the BVH * can get initialized properly. */ QBVH6(SizeEstimate size) : nodeDataStart((uint32_t)roundOffsetTo128(sizeof(QBVH6))), nodeDataCur(nodeDataStart), leafDataStart(nodeDataCur + (uint32_t)(size.nodeBytes / 64)), leafDataCur(leafDataStart), proceduralDataStart(leafDataCur + (uint32_t)(size.leafBytes / 64)), proceduralDataCur(proceduralDataStart), backPointerDataStart(proceduralDataCur + (uint32_t)(size.proceduralBytes/64)), backPointerDataEnd(backPointerDataStart) { assert(size.nodeBytes % 64 == 0); assert(size.leafBytes % 64 == 0); assert(size.proceduralBytes % 64 == 0); assert(size.bytes() <= (64LL << 32)); bounds = embree::empty; } /* Returns the root node of the BVH */ Node root() const { return Node(rootNodeOffset,(uint64_t)this); } /* sets root not offset to point to this specified node */ void setRootNodeOffset(Node node) { assert(node.cur_prim == 0); uint64_t MAYBE_UNUSED rootNodeOffset1 = (uint64_t)node - (uint64_t)this; assert(rootNodeOffset == rootNodeOffset1); } /* check if BVH is empty */ bool empty() const { return root().type == NODE_TYPE_INVALID; } /* pretty printing */ template static void printInternalNodeStatistics(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren = 6); static void print(std::ostream& cout, QBVH6::Node node, uint32_t depth, uint32_t numChildren=6); void print(std::ostream& cout = std::cout) const; /* output operator */ friend inline std::ostream& operator<<(std::ostream& cout, const QBVH6& qbvh) { qbvh.print(cout); return cout; } /* calculates BVH statistics */ BVHStatistics computeStatistics() const; /* This section implements a simple allocator for BVH data. The BVH data is separated into two section, a section where nodes and leaves in mixed mode are allocated, and a section where only leaves are allocate in fat-leaf mode. */ public: /* allocate data in the node memory section */ char* allocNode(size_t bytes) { assert(bytes % 64 == 0); uint32_t blocks = (uint32_t)bytes / 64; assert(nodeDataCur + blocks <= leafDataStart); char* ptr = (char*)this + 64 * (size_t)nodeDataCur; nodeDataCur += blocks; return ptr; } /* allocate memory in the leaf memory section */ char* allocLeaf(size_t bytes) { assert(bytes % 64 == 0); uint32_t blocks = (uint32_t)bytes / 64; assert(leafDataCur + blocks <= proceduralDataStart); char* ptr = (char*)this + 64 * (size_t)leafDataCur; leafDataCur += blocks; return ptr; } /* allocate memory in procedural leaf memory section */ char* allocProceduralLeaf(size_t bytes) { assert(bytes % 64 == 0); uint32_t blocks = (uint32_t)bytes / 64; assert(proceduralDataCur + blocks <= backPointerDataStart); char* ptr = (char*)this + 64 * (size_t)proceduralDataCur; proceduralDataCur += blocks; return ptr; } /* returns pointer to node address */ char* nodePtr(size_t ofs) { return (char*)this + 64 * size_t(nodeDataStart) + ofs; } /* returns pointer to address for next leaf allocation */ char* leafPtr() { return (char*)this + 64 * (size_t)leafDataCur; } /* returns the total number of bytes of the BVH */ size_t getTotalBytes() const { return 64 * (size_t)backPointerDataEnd; } /* returns number of bytes available for node allocations */ size_t getFreeNodeBytes() const { return 64 * (size_t)(leafDataStart - nodeDataCur); } /* returns number of bytes available for leaf allocations */ size_t getFreeLeafBytes() const { return 64 * (size_t)(proceduralDataStart - leafDataCur); } /* returns number of bytes available for procedural leaf allocations */ size_t getFreeProceduralLeafBytes() const { return 64 * (size_t)(backPointerDataStart - proceduralDataCur); } /* returns the bytes used by allocations */ size_t getUsedBytes() const { return getTotalBytes() - getFreeNodeBytes() - getFreeLeafBytes() - getFreeProceduralLeafBytes(); } bool hasBackPointers() const { return backPointerDataStart < backPointerDataEnd; } public: ze_raytracing_accel_format_internal_t rtas_format = ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1; uint32_t reserved1; BBox3f bounds; // bounding box of the BVH uint32_t nodeDataStart; // first 64 byte block of node data uint32_t nodeDataCur; // next free 64 byte block for node allocations uint32_t leafDataStart; // first 64 byte block of leaf data uint32_t leafDataCur; // next free 64 byte block for leaf allocations uint32_t proceduralDataStart; // first 64 byte block for procedural leaf data uint32_t proceduralDataCur; // next free 64 byte block for procedural leaf allocations uint32_t backPointerDataStart; // first 64 byte block for back pointers uint32_t backPointerDataEnd; // end of back pointer array uint32_t numTimeSegments = 1; uint32_t numPrims = 0; // number of primitives in this BVH uint32_t reserved[12]; uint64_t dispatchGlobalsPtr; }; static_assert(sizeof(QBVH6) == 128, "QBVH6 must be 128 bytes large"); } level-zero-raytracing-support-1.2.3/rtbuild/qbvh6_builder_sah.h000066400000000000000000001541421514453371700247020ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "qbvh6.h" #include "statistics.h" #include "quadifier.h" #include "rtbuild.h" #include #if defined(ZE_RAYTRACING) #include "builders/priminfo.h" #include "builders/primrefgen_presplit.h" #include "builders/heuristic_binning_array_aligned.h" #include "algorithms/parallel_for_for_prefix_sum.h" #else #include "../../builders/priminfo.h" #include "../../builders/primrefgen_presplit.h" #include "../../builders/heuristic_binning_array_aligned.h" #include "../../../common/algorithms/parallel_for_for_prefix_sum.h" #endif namespace embree { namespace isa { struct QBVH6BuilderSAH { static const size_t BVH_WIDTH = QBVH6::InternalNode6::NUM_CHILDREN; static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth /* the type of primitive that is referenced */ enum Type { TRIANGLE=0, QUAD=1, PROCEDURAL=2, INSTANCE=3, UNKNOWN=4, NUM_TYPES=5 }; /* check when we use spatial splits */ static bool useSpatialSplits(ze_rtas_builder_build_quality_hint_exp_t build_quality, ze_rtas_builder_build_op_exp_flags_t build_flags) { return build_quality == ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH && !(build_flags & ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION); } /* BVH allocator */ struct Allocator { Allocator() {} void init(char* data_in, size_t bytes_in) { ptr = data_in; end = bytes_in; cur.store(0); } size_t bytesAllocated() const { return cur.load(); } __forceinline void* malloc(size_t bytes, size_t align = 16) { assert(align <= 128); //ZE_RAYTRACING_ACCELERATION_STRUCTURE_ALIGNMENT_EXT if (unlikely(cur.load() >= end)) return nullptr; const size_t extra = (align - cur) & (align-1); const size_t bytes_align = bytes + extra; const size_t cur_old = cur.fetch_add(bytes_align); const size_t cur_new = cur_old + bytes_align; if (unlikely(cur_new >= end)) return nullptr; return &ptr[cur_old + extra]; } private: char* ptr = nullptr; // data buffer pointer size_t end = 0; // size of data buffer in bytes __aligned(64) std::atomic cur = 0; // current pointer to allocate next data block from }; /* triangle data for leaf creation */ struct Triangle { Triangle () : gmask(0) {} Triangle (uint32_t i0, uint32_t i1, uint32_t i2, Vec3f p0, Vec3f p1, Vec3f p2, GeometryFlags gflags, uint8_t gmask) : i0(i0), i1(i1), i2(i2), p0(p0), p1(p1), p2(p2), gflags(gflags), gmask(gmask) {} __forceinline bool valid() const { return gmask != 0; } uint32_t i0,i1,i2; Vec3f p0,p1,p2; GeometryFlags gflags; uint8_t gmask; }; /* quad data for leaf creation */ struct Quad { Quad (Vec3f p0, Vec3f p1, Vec3f p2, Vec3f p3, GeometryFlags gflags, uint8_t gmask) : p0(p0), p1(p1), p2(p2), p3(p3), gflags(gflags), gmask(gmask) {} Vec3f p0,p1,p2,p3; GeometryFlags gflags; uint8_t gmask; }; /* procedural data for leaf creation */ struct Procedural { Procedural (uint8_t gmask) : gmask(gmask) {} PrimLeafDesc desc(uint32_t geomID) const { return PrimLeafDesc(0,geomID,GeometryFlags::NONE,gmask,PrimLeafDesc::TYPE_OPACITY_CULLING_ENABLED); } uint8_t gmask; }; /* instance data for leaf creation */ struct Instance { Instance (AffineSpace3f local2world, void* accel, uint8_t imask, uint32_t instanceUserID) : local2world(local2world), accel(accel), imask(imask), instanceUserID(instanceUserID) {} AffineSpace3f local2world; void* accel; uint8_t imask; uint32_t instanceUserID; }; struct Stats { size_t numTriangles = 0; size_t numQuads = 0; size_t numProcedurals = 0; size_t numInstances = 0; /* assume some reasonable quadification rate */ void estimate_quadification() { numQuads += (numTriangles+1)/2 + numTriangles/8; numTriangles = 0; } void estimate_presplits( double factor ) { numTriangles = max(numTriangles, size_t(numTriangles*factor)); numQuads = max(numQuads , size_t(numQuads*factor)); numInstances = max(numInstances, size_t(numInstances*factor)); } size_t size() { return numTriangles+numQuads+numProcedurals+numInstances; } size_t expected_bvh_bytes() { const size_t blocks = (size()+5)/6; const size_t expected_bytes = 128 + 64*size_t(1+1.5*blocks) + numTriangles*64 + numQuads*64 + numProcedurals*8 + numInstances*128; const size_t bytes = 2*4096 + size_t(1.1*expected_bytes); // FIXME: FastAllocator wastes memory and always allocates 4kB per thread return (bytes+127)&-128; } size_t worst_case_bvh_bytes() { const size_t numPrimitives = size(); const size_t blocks = (numPrimitives+5)/6; const size_t worst_case_bytes = 128 + 64*(1+blocks + numPrimitives) + numTriangles*64 + numQuads*64 + numProcedurals*64 + numInstances*128; const size_t bytes = 2*4096 + size_t(1.1*worst_case_bytes); // FIXME: FastAllocator wastes memory and always allocates 4kB per thread return (bytes+127)&-128; } size_t scratch_space_bytes() { return size()*sizeof(PrimRef)+64; // 64 to align to 64 bytes } }; /*! settings for SAH builder */ struct Settings { public: size_t maxDepth = 27; //!< maximum depth of BVH to build size_t sahBlockSize = 6; //!< blocksize for SAH heuristic size_t leafSize[NUM_TYPES] = { 9,9,6,6,6 }; //!< target size of a leaf size_t typeSplitSize = 128; //!< number of primitives when performing type splitting }; /*! recursive state of builder */ struct BuildRecord { public: __forceinline BuildRecord () {} __forceinline BuildRecord (size_t depth, const PrimInfoRange& prims, Type type) : depth(depth), prims(prims), type(type) {} __forceinline BBox3fa bounds() const { return prims.geomBounds; } __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) { return a.prims.size() < b.prims.size(); } __forceinline friend bool operator> (const BuildRecord& a, const BuildRecord& b) { return a.prims.size() > b.prims.size(); } __forceinline size_t begin() const { return prims.begin(); } __forceinline size_t end () const { return prims.end(); } __forceinline size_t size () const { return prims.size(); } __forceinline bool equalType() const { return type != UNKNOWN; } friend inline std::ostream& operator<<(std::ostream& cout, const BuildRecord& r) { return cout << "BuildRecord { depth = " << r.depth << ", pinfo = " << r.prims << ", type = " << r.type << " }"; } public: size_t depth; //!< Depth of the root of this subtree. PrimInfoRange prims; //!< The list of primitives. Type type; //!< shared type when type of primitives are equal otherwise UNKNOWN }; struct PrimRange { PrimRange () : block_delta(0), cur_prim(0) {} PrimRange (uint8_t block_delta, uint8_t start_prim = 0) : block_delta(block_delta), cur_prim(start_prim) { assert(block_delta < 4); assert(start_prim < 16); } friend std::ostream& operator<<(std::ostream& cout,const PrimRange& range) { return cout << "PrimRange { " << (int)range.block_delta << ", " << (int)range.cur_prim << " }"; } public: uint8_t block_delta; uint8_t cur_prim; }; struct ReductionTy { ReductionTy() : node(nullptr) {} ReductionTy (void* node, NodeType type, uint8_t nodeMask, PrimRange primRange) : node((char*)node), type(type), nodeMask(nodeMask), primRange(primRange) {} inline bool valid() { return node != nullptr; } public: char* node; NodeType type; uint8_t nodeMask; PrimRange primRange; }; class ProceduralLeafBuilder { public: ProceduralLeafBuilder (char* data, size_t numBlocks) : data(data), numBlocks(numBlocks), prevBlockID(0), currBlockID(0), currProcedural(nullptr) {} ProceduralLeaf* getCurProcedural() { if (!currProcedural) { assert(numBlocks); currProcedural = new (data) ProceduralLeaf(); data += sizeof(ProceduralLeaf); numBlocks--; } return currProcedural; } PrimRange addProcedural(uint32_t geomID, uint32_t primID, const Procedural* procedural, bool last) { assert(currProcedural); if (!currProcedural->add(procedural->desc(geomID),primID,last)) { assert(numBlocks); currProcedural = (ProceduralLeaf*) data; data += sizeof(ProceduralLeaf); numBlocks--; new (currProcedural) ProceduralLeaf(procedural->desc(geomID),primID,last); currBlockID+=1; } uint32_t blockDelta = currBlockID - prevBlockID; uint32_t currPrim = (uint32_t)currProcedural->size() - 1; prevBlockID = currBlockID; return PrimRange(blockDelta,currPrim); } protected: char* data; size_t numBlocks; uint32_t prevBlockID; uint32_t currBlockID; ProceduralLeaf* currProcedural; }; template class BuilderT { public: static const size_t BINS = 32; typedef HeuristicArrayBinningSAH CentroidBinner; BuilderT (Device* device, const getSizeFunc& getSize, const getTypeFunc& getType, const createPrimRefArrayFunc& createPrimRefArray, const getTriangleFunc& getTriangle, const getTriangleIndicesFunc& getTriangleIndices, const getQuadFunc& getQuad, const getProceduralFunc& getProcedural, const getInstanceFunc& getInstance, void* scratch_ptr, size_t scratch_bytes, ze_rtas_format_exp_t rtas_format, ze_rtas_builder_build_quality_hint_exp_t build_quality, ze_rtas_builder_build_op_exp_flags_t build_flags, bool verbose) : getSize(getSize), getType(getType), createPrimRefArray(createPrimRefArray), getTriangle(getTriangle), getTriangleIndices(getTriangleIndices), getQuad(getQuad), getProcedural(getProcedural), getInstance(getInstance), prims(scratch_ptr,scratch_bytes), rtas_format((ze_raytracing_accel_format_internal_t)rtas_format), build_quality(build_quality), build_flags(build_flags), verbose(verbose) {} ReductionTy setInternalNode(char* curAddr, size_t curBytes, NodeType nodeTy, char* childAddr, BuildRecord children[BVH_WIDTH], ReductionTy values[BVH_WIDTH], size_t numChildren) { assert(curBytes >= sizeof(QBVH6::InternalNode6)); assert(numChildren <= QBVH6::InternalNode6::NUM_CHILDREN); BBox3f bounds = empty; for (size_t i=0; isetChildOffset(childAddr); uint8_t nodeMask = 0; for (uint32_t i = 0; i < numChildren; i++) { qnode->setChild(i,children[i].bounds(),values[i].type,values[i].primRange.block_delta); nodeMask |= values[i].nodeMask; } qnode->nodeMask = nodeMask; return ReductionTy(curAddr, NODE_TYPE_INTERNAL, nodeMask, PrimRange(curBytes/64)); } ReductionTy setNode(char* curAddr, size_t curBytes, NodeType nodeTy, char* childAddr, BuildRecord children[BVH_WIDTH], ReductionTy values[BVH_WIDTH], size_t numChildren) { return setInternalNode(curAddr,curBytes,nodeTy,childAddr,children,values,numChildren); } QuadLeaf getTriangleInternal(unsigned int geomID, unsigned int primID) { QBVH6BuilderSAH::Triangle tri = getTriangle(geomID,primID); const Vec3f p0 = tri.p0; const Vec3f p1 = tri.p1; const Vec3f p2 = tri.p2; Vec3f p3 = p2; uint8_t lb0 = 0,lb1 = 0,lb2 = 0; uint16_t second = quadification[geomID][primID]; /* handle paired triangle */ if (second) { QBVH6BuilderSAH::Triangle tri1 = getTriangle(geomID,primID+second); assert(tri.gflags == tri1.gflags); assert(tri.gmask == tri1.gmask ); bool pair MAYBE_UNUSED = pair_triangles(Vec3(tri.i0,tri.i1,tri.i2),Vec3(tri1.i0,tri1.i1,tri1.i2),lb0,lb1,lb2); assert(pair); if (lb0 == 3) p3 = tri1.p0; if (lb1 == 3) p3 = tri1.p1; if (lb2 == 3) p3 = tri1.p2; } return QuadLeaf( p0,p1,p2,p3, lb0,lb1,lb2, 0, geomID, primID, primID+second, tri.gflags, tri.gmask, false ); }; QuadLeaf createQuadLeaf(Type ty, const PrimRef& prim) { const unsigned int geomID = prim.geomID(); const unsigned int primID = prim.primID(); if (ty == TRIANGLE) return getTriangleInternal(geomID, primID); else { assert(ty == QUAD); const Quad quad = getQuad(geomID,primID); return QuadLeaf(quad.p0,quad.p1,quad.p3,quad.p2, 3,2,1, 0, geomID, primID, primID, quad.gflags, quad.gmask, false ); } } const ReductionTy createQuads(Type ty, const BuildRecord& curRecord, char* curAddr_) { QuadLeaf* curAddr = (QuadLeaf*) curAddr_; uint8_t nodeMask = 0; for (size_t i = curRecord.begin(); i < curRecord.end(); i++, curAddr++) { *curAddr = createQuadLeaf(ty,prims[i]); curAddr->last = (i+1) == curRecord.end(); nodeMask |= curAddr->leafDesc.geomMask; } return ReductionTy(curAddr, NODE_TYPE_QUAD, nodeMask, PrimRange(curRecord.size()*sizeof(QuadLeaf)/64)); } const ReductionTy createFatQuadLeaf(Type ty, const BuildRecord& curRecord, char* curAddr, size_t curBytes, BuildRecord children[BVH_WIDTH], size_t numChildren) { /*! allocate data for all children */ char* childData = (char*) allocator.malloc(curRecord.prims.size()*sizeof(QuadLeaf), 64); if (!childData) return ReductionTy(); /* create each child */ ReductionTy values[BVH_WIDTH]; for (size_t i=0, j=0; isetChildOffset(first_procedural + ranges[0].block_delta); qnode->nodeMask = nodeMask; ranges[0].block_delta = 0; for (size_t i = curRecord.begin(), j=0; i < curRecord.end(); i++, j++) qnode->setChild(j,prims[i].bounds(),NODE_TYPE_PROCEDURAL,ranges[j+1].block_delta,ranges[j].cur_prim); return ReductionTy(curAddr, NODE_TYPE_INTERNAL, nodeMask, PrimRange(curBytes/64)); } template const ReductionTy createInstances(const BuildRecord& curRecord, char* curAddr, size_t curBytes) { uint32_t numPrimitives = curRecord.size(); assert(numPrimitives <= QBVH6::InternalNode6::NUM_CHILDREN); /* allocate data for all children */ InstanceLeaf* childData = (InstanceLeaf*) allocator.malloc(numPrimitives*sizeof(InstanceLeaf), 64); if (!childData) return ReductionTy(); QBVH6::InternalNode6* qnode = new (curAddr) QBVH6::InternalNode6(curRecord.bounds(),NODE_TYPE_INSTANCE); qnode->setChildOffset(childData); uint8_t nodeMask = 0; for (size_t i=curRecord.begin(), c=0; i(instance.accel)->root(); root += 64*rootOfs; // goto sub-BVH new (&childData[c]) InstanceLeaf(instance.local2world,root,geomID,instance.instanceUserID,instance.imask); qnode->setChild(c,prims[i].bounds(),NODE_TYPE_INSTANCE,sizeof(InstanceLeaf)/64,0); nodeMask |= instance.imask; } qnode->nodeMask = nodeMask; return ReductionTy(curAddr, NODE_TYPE_INTERNAL, nodeMask, PrimRange(curBytes/64)); } /* finds the index of the child with largest surface area */ int findChildWithLargestArea(BuildRecord children[BVH_WIDTH], size_t numChildren, size_t leafThreshold) { /*! find best child to split */ float bestArea = neg_inf; int bestChild = -1; for (uint32_t i=0; i<(uint32_t)numChildren; i++) { /* ignore leaves as they cannot get split */ if (children[i].prims.size() <= leafThreshold) continue; /* find child with largest surface area */ const float area = halfArea(children[i].prims.geomBounds); if (area > bestArea) { bestArea = area; bestChild = i; } } return bestChild; } /* finds the index of the child with most primitives */ int findChildWithMostPrimitives(BuildRecord children[BVH_WIDTH], size_t numChildren, size_t leafThreshold) { /* find best child with largest size */ size_t bestSize = 0; int bestChild = -1; for (uint32_t i=0; i<(uint32_t)numChildren; i++) { /* ignore leaves as they cannot get split */ if (children[i].prims.size() <= leafThreshold) continue; /* remember child with largest size */ if (children[i].prims.size() > bestSize) { bestSize = children[i].size(); bestChild = i; } } return bestChild; } /* finds the index of the child with most primitives */ int findChildWithNonEqualTypes(BuildRecord children[BVH_WIDTH], size_t numChildren) { for (uint32_t i=0; i<(uint32_t)numChildren; i++) if (!children[i].equalType()) return i; return -1; } void SAHSplit(size_t depth, size_t sahBlockSize, int bestChild, BuildRecord children[BVH_WIDTH], size_t& numChildren) { PrimInfoRange linfo, rinfo; BuildRecord brecord = children[bestChild]; /* first perform centroid binning */ CentroidBinner centroid_binner(prims.data()); CentroidBinner::Split bestSplit = centroid_binner.find_block_size(brecord.prims,sahBlockSize); /* now split the primitive list */ if (bestSplit.valid()) centroid_binner.split(bestSplit,brecord.prims,linfo,rinfo); /* the above techniques may fail, and we fall back to some brute force split in the middle */ else centroid_binner.splitFallback(brecord.prims,linfo,rinfo); children[bestChild ] = BuildRecord(depth+1, linfo, brecord.type); children[numChildren] = BuildRecord(depth+1, rinfo, brecord.type); numChildren++; } void TypeSplit(size_t depth, int bestChild, BuildRecord children[BVH_WIDTH], size_t& numChildren) { BuildRecord brecord = children[bestChild]; PrimInfoRange linfo, rinfo; auto type = getType(prims[brecord.prims.begin()].geomID()); performTypeSplit(getType,type,prims.data(),brecord.prims.get_range(),linfo,rinfo); for (size_t i=linfo.begin(); i cfg.maxDepth) throw std::runtime_error("BVH too deep"); /* there should be at least one primitive and not too many */ assert(curRecord.size() > 0); assert(curRecord.size() <= cfg.leafSize[curRecord.type]); /* all primitives have to have the same type */ Type ty = getType(prims[curRecord.begin()].geomID()); for (size_t i=curRecord.begin(); i 3) { children[0] = curRecord; numChildren = 1; /*! perform fallback splits until node is full */ while (numChildren < BVH_WIDTH) { const int bestChild = findChildWithMostPrimitives(children,numChildren,1); if (bestChild == -1) break; FallbackSplit(curRecord.depth,bestChild,children,numChildren); } } } /* sort build records for faster shadow ray traversal */ std::sort(children,children+numChildren, [](const BuildRecord& a,const BuildRecord& b) { return area(a.prims.geomBounds) > area(b.prims.geomBounds); }); /* create leaf of proper type */ if (ty == TRIANGLE || ty == QUAD) return createFatQuadLeaf(ty, curRecord, curAddr, curBytes, children, numChildren); else if (ty == PROCEDURAL) return createProcedurals(curRecord,curAddr,curBytes); else if (ty == INSTANCE) { if (rtas_format == ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1) return createInstances(curRecord,curAddr,curBytes); else if (rtas_format == ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2) return createInstances(curRecord,curAddr,curBytes); } assert(false); return ReductionTy(); } const ReductionTy createLargeLeaf(const BuildRecord& curRecord, char* curAddr, size_t curBytes) { /* this should never occur but is a fatal error */ if (curRecord.depth > cfg.maxDepth) throw std::runtime_error("BVH too deep"); /* all primitives have to have the same type */ Type ty MAYBE_UNUSED = getType(prims[curRecord.begin()].geomID()); for (size_t i=curRecord.begin(); i= cfg.maxDepth; bool performTypeSplit = !curRecord.equalType() && (createLeaf || curRecord.size() <= cfg.typeSplitSize); /* check if types are really not equal when we attempt to split by type */ if (performTypeSplit) { /* check if types are already equal */ bool equalTy = true; Type type = getType(prims[curRecord.begin()].geomID()); for (size_t i=curRecord.begin()+1; i()); /*! allocate data for all children */ size_t childrenBytes = numChildren*sizeof(QBVH6::InternalNode6); char* childBase = (char*) allocator.malloc(childrenBytes, 64); if (!childBase) return ReductionTy(); /* spawn tasks */ if (curRecord.size() > 1024) // cfg.singleThreadThreshold { std::atomic success = true; parallel_for(size_t(0), numChildren, [&] (const range& r) { if (!success) return; for (size_t i=r.begin(); i& r, size_t k, unsigned int geomID) { PrimInfo pinfo(empty); for (size_t j=r.begin(); j(prim.bounds(),dim,pos,v,left,right); if (pair != QUADIFIER_TRIANGLE) { const Triangle tri1 = getTriangle(geomID,primID+pair); const Vec3fa v[4] = { tri1.p0, tri1.p1, tri1.p2, tri1.p0 }; BBox3fa left1, right1; splitPolygon<3>(prim.bounds(),dim,pos,v,left1,right1); left.extend(left1); right.extend(right1); } left_o = PrimRef(left , geomID, primID); right_o = PrimRef(right, geomID, primID); } void splitQuad(const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const { const uint32_t geomID = prim.geomID(); const uint32_t primID = prim.primID(); const Quad quad = getQuad(geomID,primID); const Vec3fa v[5] = { quad.p0, quad.p1, quad.p2, quad.p3, quad.p0 }; splitPolygon<4>(prim,dim,pos,v,left_o,right_o); } void splitTriangleOrQuad(const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const { switch (getType(prim.geomID())) { case TRIANGLE: splitTrianglePair(prim,dim,pos,left_o,right_o); break; case QUAD : splitQuad (prim,dim,pos,left_o,right_o); break; default: assert(false); break; } } void openInstance(const PrimRef& prim, const unsigned int splitprims, PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], unsigned int& numSubPrims) { struct Item { QBVH6::InternalNode6* node; float priority; Item () {} Item (QBVH6::InternalNode6* node) : node(node), priority(halfArea(node->bounds())) { /* fat leaves cannot get opened */ if (node->isFatLeaf()) priority = 0.0f; } inline bool operator< ( const Item& other) const { return priority < other.priority; } }; const uint32_t targetSubPrims = splitprims; const uint32_t geomID = prim.geomID(); const uint32_t primID MAYBE_UNUSED = prim.primID(); assert(primID == 0); // has to be zero as we encode root offset here const Instance instance = getInstance(geomID,0); QBVH6::InternalNode6* root = static_cast(instance.accel)->root().innerNode(); darray_t heap; heap.push_back(root); while (heap.size() + (QBVH6::InternalNode6::NUM_CHILDREN-1) <= MAX_PRESPLITS_PER_PRIMITIVE) { /* terminate when budget exceeded */ if (heap.size() >= targetSubPrims) break; /* get top heap element */ std::pop_heap(heap.begin(), heap.end()); auto top = heap.back(); /* if that happens there are only leaf nodes left that cannot get opened */ if (top.priority == 0.0f) break; heap.pop_back(); /* add all children to the heap */ for (uint32_t i=0; ivalid(i)) continue; heap.push_back(top.node->child(i).template innerNode()); std::push_heap(heap.begin(), heap.end()); } } /* create primrefs */ for (size_t i=0; ibounds()); int64_t ofs = ((int64_t)node-(int64_t)root)/64; assert(ofs >= INT_MIN && ofs <= INT_MAX); subPrims[numSubPrims++] = PrimRef(bounds,geomID,(int32_t)ofs); } } float primitiveAreaTrianglePair(const PrimRef& prim) { const uint32_t geomID = prim.geomID(); const uint32_t primID = prim.primID(); const uint16_t pair = quadification[geomID][primID]; assert(pair != QUADIFIER_PAIRED); const Triangle tri0 = getTriangle(geomID,primID); float A = areaProjectedTriangle(tri0.p0,tri0.p1,tri0.p2); if (pair == QUADIFIER_TRIANGLE) return A; const Triangle tri1 = getTriangle(geomID,primID+pair); A += areaProjectedTriangle(tri1.p0,tri1.p1,tri1.p2); return A; } float primitiveAreaQuad(const PrimRef& prim) { const uint32_t geomID = prim.geomID(); const uint32_t primID = prim.primID(); const Quad quad = getQuad(geomID,primID); const float A0 = areaProjectedTriangle(quad.p0,quad.p1,quad.p3); const float A1 = areaProjectedTriangle(quad.p2,quad.p3,quad.p1); return A0+A1; } float primitiveAreaInstance(const PrimRef& prim) { return halfArea(prim.bounds()); } float primitiveArea(const PrimRef& prim) { switch (getType(prim.geomID())) { case TRIANGLE: return primitiveAreaTrianglePair(prim); case QUAD : return primitiveAreaQuad(prim); case INSTANCE: return primitiveAreaInstance(prim); default : return 0.0f; } } ReductionTy build(uint32_t numGeometries, PrimInfo& pinfo_o, char* root) { double t1 = verbose ? getSeconds() : 0.0; /* quadify all triangles */ ParallelForForPrefixSumState pstate; pstate.init(numGeometries,getSize,size_t(1024)); PrimInfo pinfo = parallel_for_for_prefix_sum0_( pstate, size_t(1), getSize, PrimInfo(empty), [&](size_t geomID, const range& r, size_t k) -> PrimInfo { if (getType(geomID) == QBVH6BuilderSAH::TRIANGLE) return PrimInfo(pair_triangles(geomID,(QuadifierType*) quadification[geomID].data(), r.begin(), r.end(), getTriangleIndices)); else return PrimInfo(r.size()); }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); double t2 = verbose ? getSeconds() : 0.0; if (verbose) std::cout << "quadification: " << std::setw(10) << (t2-t1)*1000.0 << "ms, " << std::endl; //<< std::setw(10) << 1E-6*double(numTriangles)/(t2-t1) << " Mtris/s" << std::endl; size_t numPrimitives = pinfo.size(); /* first try */ //pstate.init(numGeometries,getSize,size_t(1024)); pinfo = parallel_for_for_prefix_sum1_( pstate, size_t(1), getSize, PrimInfo(empty), [&](size_t geomID, const range& r, size_t k, const PrimInfo& base) -> PrimInfo { if (getType(geomID) == QBVH6BuilderSAH::TRIANGLE) return createTrianglePairPrimRefArray(prims.data(),r,base.size(),(unsigned)geomID); else return createPrimRefArray(prims,BBox1f(0,1),r,base.size(),(unsigned)geomID); }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); double t3 = verbose ? getSeconds() : 0.0; if (verbose) std::cout << "primrefgen : " << std::setw(10) << (t3-t2)*1000.0 << "ms, " << std::setw(10) << 1E-6*double(numPrimitives)/(t3-t2) << " Mprims/s" << std::endl; /* if we need to filter out geometry, run again */ if (pinfo.size() != numPrimitives) { numPrimitives = pinfo.size(); pinfo = parallel_for_for_prefix_sum1_( pstate, size_t(1), getSize, PrimInfo(empty), [&](size_t geomID, const range& r, size_t k, const PrimInfo& base) -> PrimInfo { if (getType(geomID) == QBVH6BuilderSAH::TRIANGLE) { return createTrianglePairPrimRefArray(prims.data(),r,base.size(),(unsigned)geomID); } else return createPrimRefArray(prims,BBox1f(0,1),r,base.size(),(unsigned)geomID); }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); } assert(pinfo.size() == numPrimitives); double t4 = verbose ? getSeconds() : 0.0; if (verbose) std::cout << "primrefgen2 : " << std::setw(10) << (t4-t3)*1000.0 << "ms, " << std::setw(10) << 1E-6*double(numPrimitives)/(t4-t3) << " Mprims/s" << std::endl; /* perform pre-splitting */ if (useSpatialSplits(build_quality,build_flags) && numPrimitives) { auto splitter = [this] (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) { splitTriangleOrQuad(prim,dim,pos,left_o,right_o); }; auto splitter1 = [&] (const PrimRef& prim, const unsigned int splitprims, const SplittingGrid& grid, PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], unsigned int& numSubPrims) { if (getType(prim.geomID()) == QBVH6BuilderSAH::INSTANCE) { openInstance(prim,splitprims,subPrims,numSubPrims); } else { splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims); } }; auto primitiveArea1 = [this] (const PrimRef& prim) -> float { return primitiveArea(prim); }; pinfo = createPrimRefArray_presplit(numPrimitives, prims, pinfo, splitter1, primitiveArea1); } /* exit early if scene is empty */ if (pinfo.size() == 0) { pinfo_o = pinfo; return createEmptyNode(root); } /* build hierarchy */ BuildRecord record(1,pinfo,UNKNOWN); ReductionTy r = createInternalNode(record,root,sizeof(QBVH6::InternalNode6)); double t5 = verbose ? getSeconds() : 0.0; if (verbose) std::cout << "bvh_build : " << std::setw(10) << (t5-t4)*1000.0 << "ms, " << std::setw(10) << 1E-6*double(numPrimitives)/(t5-t4) << " Mprims/s" << std::endl; pinfo_o = pinfo; return r; } bool build(size_t numGeometries, char* accel, size_t bytes, BBox3f* boundsOut, size_t* accelBufferBytesOut, void* dispatchGlobalsPtr) { double t0 = verbose ? getSeconds() : 0.0; Stats stats; size_t numPrimitives = 0; quadification.resize(numGeometries); for (size_t geomID=0; geomIDrtas_format = rtas_format; qbvh->numPrims = 0; //numPrimitives; uint64_t rootNodeOffset = QBVH6::Node((char*)(r.node - (char*)qbvh), r.type, r.primRange.cur_prim); assert(rootNodeOffset == QBVH6::rootNodeOffset); _unused(rootNodeOffset); qbvh->bounds = bounds; qbvh->numTimeSegments = 1; qbvh->dispatchGlobalsPtr = (uint64_t) dispatchGlobalsPtr; #if 0 BVHStatistics stats = qbvh->computeStatistics(); stats.print(std::cout); stats.print_raw(std::cout); qbvh->print(); /*std::cout << "#define bvh_bytes " << bytes << std::endl; std::cout << "const unsigned char bvh_data[bvh_bytes] = {"; for (size_t i=0; i prims; Allocator allocator; std::vector> quadification; ze_raytracing_accel_format_internal_t rtas_format; ze_rtas_builder_build_quality_hint_exp_t build_quality; ze_rtas_builder_build_op_exp_flags_t build_flags; bool verbose; }; template static void estimateSize(size_t numGeometries, const getSizeFunc& getSize, const getTypeFunc& getType, ze_rtas_format_exp_t rtas_format, ze_rtas_builder_build_quality_hint_exp_t build_quality, ze_rtas_builder_build_op_exp_flags_t build_flags, size_t& expectedBytes, size_t& worstCaseBytes, size_t& scratchBytes) { Stats stats; for (size_t geomID=0; geomID static bool build(size_t numGeometries, Device* device, const getSizeFunc& getSize, const getTypeFunc& getType, const createPrimRefArrayFunc& createPrimRefArray, const getTriangleFunc& getTriangle, const getTriangleIndicesFunc& getTriangleIndices, const getQuadFunc& getQuad, const getProceduralFunc& getProcedural, const getInstanceFunc& getInstance, char* accel_ptr, size_t accel_bytes, void* scratch_ptr, size_t scratch_bytes, BBox3f* boundsOut, size_t* accelBufferBytesOut, ze_rtas_format_exp_t rtas_format, ze_rtas_builder_build_quality_hint_exp_t build_quality, ze_rtas_builder_build_op_exp_flags_t build_flags, bool verbose, void* dispatchGlobalsPtr) { /* align scratch buffer to 64 bytes */ bool scratchAligned = std::align(64,0,scratch_ptr,scratch_bytes); if (!scratchAligned) throw std::runtime_error("scratch buffer cannot get aligned"); BuilderT builder (device, getSize, getType, createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance, scratch_ptr, scratch_bytes, rtas_format, build_quality, build_flags, verbose); return builder.build(numGeometries, accel_ptr, accel_bytes, boundsOut, accelBufferBytesOut, dispatchGlobalsPtr); } }; } } level-zero-raytracing-support-1.2.3/rtbuild/qnode.h000066400000000000000000000461441514453371700224230ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include #include #include "leaf.h" #if defined(__INTEL_LLVM_COMPILER) && defined(WIN32) inline float embree_frexp(float value, int* exp) { // using the Intel(R) oneAPI DPC++/C++ Compiler with -no-intel-libs results // in an unresolved external symbol "__imp_frexp" error and therefore we // provide a the manual implemetation referenced here // https://en.cppreference.com/w/c/numeric/math/frexp in this case static_assert(FLT_RADIX == 2, "custom implementation of frexp only works for base 2 floating point representations"); *exp = (value == 0) ? 0 : (int)(1 + logb(value)); return scalbn(value, -(*exp)); } #endif namespace embree { /* The NodeRef structure references a node of the BVH. It stores the * pointer to that node as well as the node's type. If a leaf node * is referenced the current primitive to intersect is also * stored. */ struct NodeRef { NodeRef () : node(nullptr), type(NODE_TYPE_INVALID), cur_prim(0) {} NodeRef (void* node, NodeType type, uint8_t cur_prim) : node((char*)node), type(type), cur_prim(cur_prim) { assert(cur_prim < 16); } /* decode from 64 bit encoding used in MemRay and Instances */ NodeRef (uint64_t nodePtr, uint64_t offset = 0) { node = (char*) (nodePtr & ~(uint64_t)0xF) + offset; //type = NODE_TYPE_INTERNAL; // we can only reference internal nodes inside ray and instances type = (NodeType) (nodePtr & 0xF); cur_prim = 0; } /* 64 bit encoding used in MemRay and Instances */ operator uint64_t() const { //assert(type == NODE_TYPE_INTERNAL); assert(((uint64_t)node & 0xF) == 0); assert(cur_prim == 0); return (uint64_t)node + (uint64_t) type; } /* returns the internal node that is referenced */ template InternalNode* innerNode() const { assert(type == NODE_TYPE_INTERNAL); return (InternalNode*)node; } /* returns the instance leaf node that is referenced */ InstanceLeaf* leafNodeInstance() const { assert(type == NODE_TYPE_INSTANCE); return (InstanceLeaf*)node; } /* returns the quad leaf node that is referenced */ QuadLeaf* leafNodeQuad() const { assert(type == NODE_TYPE_QUAD); return (QuadLeaf*)node; } /* returns the procedural leaf node that is referenced */ ProceduralLeaf* leafNodeProcedural() const { assert(type == NODE_TYPE_PROCEDURAL); return (ProceduralLeaf*)node; } friend bool operator ==(const NodeRef& a, const NodeRef& b) { return (a.node == b.node) && (a.type == b.type) && (a.cur_prim == b.cur_prim); } friend bool operator !=(const NodeRef& a, const NodeRef& b) { return !(a == b); } #if !defined(__RTRT_GSIM) friend inline std::ostream& operator<<(std::ostream& _cout, const NodeRef& node) { return _cout << "NodeRef { " << (void*)node.node << ", " << node.type << ", " << (int)node.cur_prim << " }"; } #endif public: char* node; // pointer to the referenced node NodeType type; // type of the node referenced uint8_t cur_prim : 4; // current primitive referenced in the leaf }; /* The internal nodes of the BVH store references to 6 children and quantized bounds for each of these children. All children are stored consecutively in memory at a location refered to by the childOffset. To calculate the relative location of the i'th child the size (as encoded in blockIncr) of all the children with index smaller than i has to get added to that childOffset. The calculated offset specifies the signed number of 64 bytes blocks relative to the node address to reach the child. If the nodeType is INTERNAL we are in mixed mode and the type of each child is encoded inside the startPrim member. Otherwise we are in fat leaf mode and each child has the same type 'nodeType' and startPrim identifies the primitive where the leaf starts. The leaf spans all primitives from this start primitive to the end primitive which is marked as 'last'. The bounding boxes of the children are quantized into a regular 3D grid. The world space position of the origin of that grid is stored at full precision in the lower member, while the step size is encoded in the exp_x, exp_y, and exp_z members as power of 2. Thus grid coordinates together with their exponent (xi,exp_x), (yi,exp_y), (zi,exp_z) correspond to the mantissa and exponent of a floating point number representation without leading zero. Thus the world space position of the bounding planes can get calculated as follows: x = lower.x + pow(2,exp_x) * 0.xi y = lower.y + pow(2,exp_y) * 0.yi z = lower.z + pow(2,exp_z) * 0.zi As the stored grid coordinates for child bounds are only unsigned 8-bit values, ray/box intersections can get performed with reduced precision. The node also stores a mask used for ray filtering. Only rays with (node.nodeMask & ray.rayMask) != 0 are traversed, all others are culled. */ struct InternalNode6Data { static constexpr uint32_t NUM_CHILDREN = 6; Vec3f lower; // world space origin of quantization grid int32_t childOffset; // offset to all children in 64B multiples NodeType nodeType; // the type of the node uint8_t pad; // unused byte int8_t exp_x; // 2^exp_x is the size of the grid in x dimension int8_t exp_y; // 2^exp_y is the size of the grid in y dimension int8_t exp_z; // 2^exp_z is the size of the grid in z dimension uint8_t nodeMask; // mask used for ray filtering struct ChildData { uint8_t blockIncr : 2; // size of child in 64 byte blocks uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode uint8_t pad : 2; // unused bits } childData[NUM_CHILDREN]; uint8_t lower_x[NUM_CHILDREN]; // the quantized lower bounds in x-dimension uint8_t upper_x[NUM_CHILDREN]; // the quantized upper bounds in x-dimension uint8_t lower_y[NUM_CHILDREN]; // the quantized lower bounds in y-dimension uint8_t upper_y[NUM_CHILDREN]; // the quantized upper bounds in y-dimension uint8_t lower_z[NUM_CHILDREN]; // the quantized lower bounds in z-dimension uint8_t upper_z[NUM_CHILDREN]; // the quantized upper bounds in z-dimension }; static_assert(sizeof(InternalNode6Data) == 64, "InternalNode6Data must be 64 bytes large"); template struct InternalNodeCommon : public InternalNodeData { using InternalNodeData::NUM_CHILDREN; InternalNodeCommon() { } InternalNodeCommon(NodeType type) { this->nodeType = type; this->childOffset = 0; this->nodeMask = 0xFF; for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) this->childData[i] = { 0, 0, 0 }; this->lower = Vec3f(0.0f); this->exp_x = 0; this->exp_y = 0; this->exp_z = 0; /* set all child bounds to invalid */ for (uint32_t i = 0; i < InternalNodeData::NUM_CHILDREN; i++) { this->lower_x[i] = this->lower_y[i] = this->lower_z[i] = 0x80; this->upper_x[i] = this->upper_y[i] = this->upper_z[i] = 0x00; } } /* this function slightly enlarges bounds in order to make traversal watertight */ static const BBox3f conservativeBox(const BBox3f box, float ulps = 1.0f) { const float err = ulps*std::numeric_limits::epsilon() * std::max(reduce_max(abs(box.lower)), reduce_max(abs(box.upper))); return enlarge(box, Vec3f(err)); } /* this function quantizes the provided bounds */ const BBox3f quantize_bounds(BBox3f fbounds, Vec3f base) const { const Vec3f lower = fbounds.lower-base; const Vec3f upper = fbounds.upper-base; float qlower_x = ldexpf(lower.x, -this->exp_x + 8); float qlower_y = ldexpf(lower.y, -this->exp_y + 8); float qlower_z = ldexpf(lower.z, -this->exp_z + 8); float qupper_x = ldexpf(upper.x, -this->exp_x + 8); float qupper_y = ldexpf(upper.y, -this->exp_y + 8); float qupper_z = ldexpf(upper.z, -this->exp_z + 8); assert(qlower_x >= 0.0f && qlower_x <= 255.0f); assert(qlower_y >= 0.0f && qlower_y <= 255.0f); assert(qlower_z >= 0.0f && qlower_z <= 255.0f); assert(qupper_x >= 0.0f && qupper_x <= 255.0f); assert(qupper_y >= 0.0f && qupper_y <= 255.0f); assert(qupper_z >= 0.0f && qupper_z <= 255.0f); qlower_x = min(max(floorf(qlower_x),0.0f),255.0f); qlower_y = min(max(floorf(qlower_y),0.0f),255.0f); qlower_z = min(max(floorf(qlower_z),0.0f),255.0f); qupper_x = min(max(ceilf(qupper_x),0.0f),255.0f); qupper_y = min(max(ceilf(qupper_y),0.0f),255.0f); qupper_z = min(max(ceilf(qupper_z),0.0f),255.0f); BBox3f qbounds(Vec3f(qlower_x, qlower_y, qlower_z), Vec3f(qupper_x, qupper_y, qupper_z)); /* verify that quantized bounds are conservative */ BBox3f dbounds = dequantize_bounds(qbounds, base); dbounds.lower.x -= 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8)); dbounds.lower.y -= 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8)); dbounds.lower.z -= 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8)); dbounds.upper.x += 2.0f*float(ulp) * (fabs(base.x) + ldexpf(255.0f,this->exp_x-8)); dbounds.upper.y += 2.0f*float(ulp) * (fabs(base.y) + ldexpf(255.0f,this->exp_y-8)); dbounds.upper.z += 2.0f*float(ulp) * (fabs(base.z) + ldexpf(255.0f,this->exp_z-8)); assert(subset(fbounds, dbounds)); return qbounds; } /* this function de-quantizes the provided bounds */ const BBox3f dequantize_bounds(const BBox3f& qbounds, Vec3f base) const { const float dlower_x = base.x + ldexpf(qbounds.lower.x, this->exp_x - 8); const float dlower_y = base.y + ldexpf(qbounds.lower.y, this->exp_y - 8); const float dlower_z = base.z + ldexpf(qbounds.lower.z, this->exp_z - 8); const float dupper_x = base.x + ldexpf(qbounds.upper.x, this->exp_x - 8); const float dupper_y = base.y + ldexpf(qbounds.upper.y, this->exp_y - 8); const float dupper_z = base.z + ldexpf(qbounds.upper.z, this->exp_z - 8); return BBox3f(Vec3f(dlower_x, dlower_y, dlower_z), Vec3f(dupper_x, dupper_y, dupper_z)); } /* Determines if a child is valid. We have only to look at the * topmost bit of lower_x and upper_x to determine if child is * valid */ bool valid(int i) const { return !(this->lower_x[i] & 0x80) || (this->upper_x[i] & 0x80); } /* Determines if the node is in fat leaf mode. */ bool isFatLeaf() const { return this->nodeType != NODE_TYPE_MIXED; } /* Sets the offset to the child memory. */ void setChildOffset(void* childDataPtr) { int64_t childDataOffset = childDataPtr ? (char*)childDataPtr - (char*)this : 0; assert(childDataOffset % 64 == 0); assert((int64_t)(int32_t)(childDataOffset / 64) == (childDataOffset / 64)); this->childOffset = (int32_t)(childDataOffset / 64); } /* Sets the type, size, and current primitive of a child */ void setChildType(uint32_t child, NodeType childType, uint32_t block_delta, uint32_t cur_prim) { // there is no need to store block_delta for last child if (child == NUM_CHILDREN-1) block_delta = 0; assert(block_delta < 4); assert(cur_prim < 16); if (isFatLeaf()) { assert(this->nodeType == childType); this->childData[child].startPrim = cur_prim; this->childData[child].blockIncr = block_delta; } else { assert(cur_prim == 0); this->childData[child].startPrim = childType; this->childData[child].blockIncr = block_delta; } } void invalidateChild(uint32_t childID) { /* set child bounds to invalid */ this->lower_x[childID] = this->lower_y[childID] = this->lower_z[childID] = 0x80; this->upper_x[childID] = this->upper_y[childID] = this->upper_z[childID] = 0x00; } /* Sets child bounds */ void setChildBounds(uint32_t childID, const BBox3f& fbounds) { assert(fbounds.lower.x <= fbounds.upper.x); assert(fbounds.lower.y <= fbounds.upper.y); assert(fbounds.lower.z <= fbounds.upper.z); const BBox3f qbounds = quantize_bounds(conservativeBox(fbounds), this->lower); this->lower_x[childID] = (uint8_t)qbounds.lower.x; this->lower_y[childID] = (uint8_t)qbounds.lower.y; this->lower_z[childID] = (uint8_t)qbounds.lower.z; this->upper_x[childID] = (uint8_t)qbounds.upper.x; this->upper_y[childID] = (uint8_t)qbounds.upper.y; this->upper_z[childID] = (uint8_t)qbounds.upper.z; assert(valid(childID)); } /* Sets an entire child, including bounds, type, size, and referenced primitive. */ void setChild(uint32_t childID, const BBox3f& fbounds, NodeType type, uint32_t block_delta, uint32_t cur_prim = 0) { setChildType(childID, type, block_delta, cur_prim); setChildBounds(childID, fbounds); } /* Calculates the byte offset to the child. The offset is * relative to the address this node. */ int64_t getChildOffset(uint32_t childID) const { int64_t ofs = this->childOffset; for (uint32_t j = 0; j < childID; j++) ofs += this->childData[j].blockIncr; return 64 * ofs; } /* Returns the type of the child. In fat leaf mode the type is * shared between all children, otherwise a per-child type is * encoded inside the startPrim member for each child. */ NodeType getChildType(uint32_t childID) const { if (isFatLeaf()) return this->nodeType; else return (NodeType)(this->childData[childID].startPrim); } /* Returns the start primitive of a child. In case of children * in fat-leaf mode, all children are leaves, and the start * primitive specifies the primitive in a leaf block where the * leaf start. */ uint32_t getChildStartPrim(uint32_t childID) const { if (isFatLeaf()) return this->childData[childID].startPrim; else return 0; } /* Returns a node reference for the given child. This reference * includes the node pointer, type, and start primitive. */ NodeRef child(void* This, int childID) const { return NodeRef((char*)This + getChildOffset(childID), getChildType(childID), getChildStartPrim(childID)); } NodeRef child(int i) const { return child((void*)this, i); } }; template struct InternalNode : public InternalNodeCommon { using InternalNodeCommon::valid; using InternalNodeCommon::getChildType; using InternalNodeCommon::getChildOffset; using InternalNodeCommon::getChildStartPrim; using InternalNodeCommon::conservativeBox; using InternalNodeCommon::dequantize_bounds; using InternalNodeCommon::NUM_CHILDREN; InternalNode() { } InternalNode (NodeType type) : InternalNodeCommon(type) {} /* Constructs an internal node. The quantization grid gets * initialized from the provided parent bounds. */ InternalNode (BBox3f box, NodeType type = NODE_TYPE_MIXED) : InternalNode(type) { setNodeBounds(box); } void setNodeBounds(BBox3f box) { /* initialize quantization grid */ box = conservativeBox(box); const float _ulp = std::numeric_limits::epsilon(); const float up = 1.0f + float(_ulp); Vec3f len = box.size() * up; this->lower = box.lower; #if defined(__INTEL_LLVM_COMPILER) && defined(WIN32) int _exp_x; float mant_x = embree_frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f); int _exp_y; float mant_y = embree_frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f); int _exp_z; float mant_z = embree_frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f); #else int _exp_x; float mant_x = frexp(len.x, &_exp_x); _exp_x += (mant_x > 255.0f / 256.0f); int _exp_y; float mant_y = frexp(len.y, &_exp_y); _exp_y += (mant_y > 255.0f / 256.0f); int _exp_z; float mant_z = frexp(len.z, &_exp_z); _exp_z += (mant_z > 255.0f / 256.0f); #endif _exp_x = max(-128,_exp_x); // enlarge too tight bounds _exp_y = max(-128,_exp_y); _exp_z = max(-128,_exp_z); this->exp_x = _exp_x; assert(_exp_x >= -128 && _exp_x <= 127); this->exp_y = _exp_y; assert(_exp_y >= -128 && _exp_y <= 127); this->exp_z = _exp_z; assert(_exp_z >= -128 && _exp_z <= 127); } /* dequantizes the bounds of the specified child */ const BBox3f bounds(uint32_t childID) const { return dequantize_bounds(BBox3f(Vec3f(this->lower_x[childID], this->lower_y[childID], this->lower_z[childID]), Vec3f(this->upper_x[childID], this->upper_y[childID], this->upper_z[childID])), this->lower); } const BBox3f bounds() const { BBox3f b = empty; for (size_t i=0; isetChildOffset((char*)this + getChildOffset(0)); } #if !defined(__RTRT_GSIM) /* output of internal node */ void print(std::ostream& cout, uint32_t depth, bool close) const { cout << tab(depth) << "InternalNode" << NUM_CHILDREN << " {" << std::endl; cout << tab(depth) << " addr = " << this << std::endl; cout << tab(depth) << " childOffset = " << 64 * int64_t(this->childOffset) << std::endl; cout << tab(depth) << " nodeType = " << NodeType(this->nodeType) << std::endl; cout << tab(depth) << " nodeMask = " << std::bitset<8>(this->nodeMask) << std::endl; for (uint32_t i = 0; i < NUM_CHILDREN; i++) { cout << tab(depth) << " child" << i << " = { "; if (valid(i)) { cout << "type = " << getChildType(i); cout << ", offset = " << getChildOffset(i); cout << ", prim = " << getChildStartPrim(i); cout << ", bounds = " << bounds(i); } else { cout << "INVALID"; } cout << " }" << std::endl; } if (close) cout << tab(depth) << "}"; } /* output operator for internal node */ friend inline std::ostream& operator<<(std::ostream& cout, const InternalNode& node) { node.print(cout, 0, true); return cout; } #endif }; inline size_t GetInternalNodeSize(uint32_t numChildren) { if (numChildren <= 6) return sizeof(InternalNode6Data); else assert(false); return 0; } typedef InternalNode InternalNode6; } level-zero-raytracing-support-1.2.3/rtbuild/quadifier.h000066400000000000000000000107121514453371700232560ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if defined(ZE_RAYTRACING) #include "sys/sysinfo.h" #include "sys/vector.h" #include "math/vec2.h" #include "math/vec3.h" #include "math/bbox.h" #include "math/affinespace.h" #else #include "../../common/default.h" #endif namespace embree { enum QuadifierType : uint16_t { QUADIFIER_PAIRED = 0xFFFF, // indicates that triangle is paired with a previous triangle QUADIFIER_TRIANGLE = 0, // indicates that this triangle cannot get paired QUADIFIER_QUAD = 1, // all values > 0 and != 0xFFFF indicate offset to paired triangle QUADIFIER_MAX_DISTANCE = 31, }; template struct static_deque { __forceinline Ty pop_front() { assert(size()); return operator[](begin++); } __forceinline void push_back(const Ty& v) { assert(size() < N); operator[](end++) = v; } __forceinline size_t size() const { assert(end >= begin); return end-begin; } __forceinline bool full() const { return size() == N; } __forceinline void erase( size_t j ) { assert(j >= begin && j < end); /* fast path as we mostly just merge with the subsequent triangle */ if (likely(j == begin)) begin++; /* fastest when left side is small */ else if (j-begin < end-j-1) { for (size_t i=j; i>=begin+1; i--) operator[](i) = operator[](i-1); begin++; } /* fastest if right side is small */ else { for (size_t i=j+1; i a, Vec3 b, uint8_t& lb0, uint8_t& lb1, uint8_t& lb2) { const vuint<4> va(a.x,a.y,a.z,0); const vboolf<4> mb0 = vboolf<4>(0x8) | vuint<4>(b.x) == va; const vboolf<4> mb1 = vboolf<4>(0x8) | vuint<4>(b.y) == va; const vboolf<4> mb2 = vboolf<4>(0x8) | vuint<4>(b.z) == va; lb0 = bsf(movemask(mb0)); lb1 = bsf(movemask(mb1)); lb2 = bsf(movemask(mb2)); return (lb0 == 3) + (lb1 == 3) + (lb2 == 3) <= 1; } template __forceinline void merge_triangle_window( uint32_t geomID, static_deque& triangleWindow, QuadifierType* quads_o, const GetTriangleFunc& getTriangle ) { uint32_t primID0 = triangleWindow.pop_front(); /* load first triangle */ Vec3 tri0 = getTriangle(geomID, primID0); /* find a second triangle in triangle window to pair with */ for ( size_t slot = triangleWindow.begin; slot != triangleWindow.end; ++slot ) { /* load second triangle */ uint32_t primID1 = triangleWindow[slot]; Vec3 tri1 = getTriangle(geomID, primID1); /* try to pair triangles */ uint8_t lb0,lb1,lb2; bool pair = pair_triangles(tri0,tri1,lb0,lb1,lb2); /* the offset between the triangles cannot be too large as hardware limits bits for offset encode */ uint32_t prim_offset = primID1 - primID0; pair &= prim_offset <= QUADIFIER_MAX_DISTANCE; /* store pairing if successful */ if (pair) { assert(prim_offset > 0 && prim_offset < QUADIFIER_PAIRED); quads_o[primID0] = (QuadifierType) prim_offset; quads_o[primID1] = QUADIFIER_PAIRED; triangleWindow.erase(slot); return; } } /* make a triangle if we fail to find a candiate to pair with */ quads_o[primID0] = QUADIFIER_TRIANGLE; } template inline size_t pair_triangles( uint32_t geomID, QuadifierType* quads_o, uint32_t primID0, uint32_t primID1, const GetTriangleFunc& getTriangle ) { static_deque triangleWindow; size_t numTrianglePairs = 0; for (uint32_t primID=primID0; primIDtriangleCount); return *(ze_rtas_triangle_indices_uint32_exp_t*)((char*)geom->pTriangleBuffer + uint64_t(primID)*geom->triangleStride); } inline Vec3f getVertex(const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t vertexID) { assert(vertexID < geom->vertexCount); return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride); } inline ze_rtas_quad_indices_uint32_exp_t getPrimitive(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID) { assert(primID < geom->quadCount); return *(ze_rtas_quad_indices_uint32_exp_t*)((char*)geom->pQuadBuffer + uint64_t(primID)*geom->quadStride); } inline Vec3f getVertex(const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t vertexID) { assert(vertexID < geom->vertexCount); return *(Vec3f*)((char*)geom->pVertexBuffer + uint64_t(vertexID)*geom->vertexStride); } inline AffineSpace3fa getTransform(const ze_rtas_builder_instance_geometry_info_exp_t* geom) { switch (geom->transformFormat) { case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR: { const ze_rtas_transform_float3x4_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_column_major_exp_t*) geom->pTransform; return { { xfm->vx_x, xfm->vx_y, xfm->vx_z }, { xfm->vy_x, xfm->vy_y, xfm->vy_z }, { xfm->vz_x, xfm->vz_y, xfm->vz_z }, { xfm-> p_x, xfm-> p_y, xfm-> p_z } }; } case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR: { const ze_rtas_transform_float3x4_aligned_column_major_exp_t* xfm = (const ze_rtas_transform_float3x4_aligned_column_major_exp_t*) geom->pTransform; return { { xfm->vx_x, xfm->vx_y, xfm->vx_z }, { xfm->vy_x, xfm->vy_y, xfm->vy_z }, { xfm->vz_x, xfm->vz_y, xfm->vz_z }, { xfm-> p_x, xfm-> p_y, xfm-> p_z } }; } case ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR: { const ze_rtas_transform_float3x4_row_major_exp_t* xfm = (const ze_rtas_transform_float3x4_row_major_exp_t*) geom->pTransform; return { { xfm->vx_x, xfm->vx_y, xfm->vx_z }, { xfm->vy_x, xfm->vy_y, xfm->vy_z }, { xfm->vz_x, xfm->vz_y, xfm->vz_z }, { xfm-> p_x, xfm-> p_y, xfm-> p_z } }; } default: throw std::runtime_error("invalid transform format"); } } inline void verifyGeometryDesc(const ze_rtas_builder_triangles_geometry_info_exp_t* geom) { if (geom->triangleFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32) throw std::runtime_error("triangle format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32"); if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3) throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3"); if (geom->triangleCount && geom->pTriangleBuffer == nullptr) throw std::runtime_error("no triangle buffer specified"); if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified"); } inline void verifyGeometryDesc(const ze_rtas_builder_quads_geometry_info_exp_t* geom) { if (geom->quadFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32) throw std::runtime_error("quad format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32"); if (geom->vertexFormat != ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3) throw std::runtime_error("vertex format must be ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3"); if (geom->quadCount && geom->pQuadBuffer == nullptr) throw std::runtime_error("no quad buffer specified"); if (geom->vertexCount && geom->pVertexBuffer == nullptr) throw std::runtime_error("no vertex buffer specified"); } inline void verifyGeometryDesc(const ze_rtas_builder_procedural_geometry_info_exp_t* geom) { if (geom->primCount && geom->pfnGetBoundsCb == nullptr) throw std::runtime_error("no bounds function specified"); if (geom->reserved != 0) throw std::runtime_error("reserved value must be zero"); } inline void verifyGeometryDesc(const ze_rtas_builder_instance_geometry_info_exp_t* geom) { if (geom->pTransform == nullptr) throw std::runtime_error("no instance transformation specified"); if (geom->pBounds == nullptr) throw std::runtime_error("no acceleration structure bounds specified"); if (geom->pAccelerationStructure == nullptr) throw std::runtime_error("no acceleration structure to instantiate specified"); } inline bool buildBounds(API_TY aty, const ze_rtas_builder_triangles_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) { if (primID >= geom->triangleCount) return false; const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID); if (unlikely(tri.v0 >= geom->vertexCount)) return false; if (unlikely(tri.v1 >= geom->vertexCount)) return false; if (unlikely(tri.v2 >= geom->vertexCount)) return false; const Vec3f p0 = getVertex(geom,tri.v0); const Vec3f p1 = getVertex(geom,tri.v1); const Vec3f p2 = getVertex(geom,tri.v2); if (unlikely(!isvalid(p0))) return false; if (unlikely(!isvalid(p1))) return false; if (unlikely(!isvalid(p2))) return false; bbox = BBox3fa(min(p0,p1,p2),max(p0,p1,p2)); return true; } inline bool buildBounds(API_TY aty, const ze_rtas_builder_quads_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) { if (primID >= geom->quadCount) return false; const ze_rtas_quad_indices_uint32_exp_t tri = getPrimitive(geom,primID); if (unlikely(tri.v0 >= geom->vertexCount)) return false; if (unlikely(tri.v1 >= geom->vertexCount)) return false; if (unlikely(tri.v2 >= geom->vertexCount)) return false; if (unlikely(tri.v3 >= geom->vertexCount)) return false; const Vec3f p0 = getVertex(geom,tri.v0); const Vec3f p1 = getVertex(geom,tri.v1); const Vec3f p2 = getVertex(geom,tri.v2); const Vec3f p3 = getVertex(geom,tri.v3); if (unlikely(!isvalid(p0))) return false; if (unlikely(!isvalid(p1))) return false; if (unlikely(!isvalid(p2))) return false; if (unlikely(!isvalid(p3))) return false; bbox = BBox3fa(min(p0,p1,p2,p3),max(p0,p1,p2,p3)); return true; } inline bool buildBounds(API_TY aty, const ze_rtas_builder_procedural_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) { if (primID >= geom->primCount) return false; if (geom->pfnGetBoundsCb == nullptr) return false; BBox3f bounds; ze_rtas_geometry_aabbs_exp_cb_params_t params = { ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS }; params.primID = primID; params.primIDCount = 1; params.pGeomUserPtr = geom->pGeomUserPtr; params.pBuildUserPtr = buildUserPtr; params.pBoundsOut = (ze_rtas_aabb_exp_t*) &bounds; (geom->pfnGetBoundsCb)(¶ms); if (unlikely(!isvalid(bounds.lower))) return false; if (unlikely(!isvalid(bounds.upper))) return false; if (unlikely(bounds.empty())) return false; bbox = (BBox3f&) bounds; return true; } inline bool buildBounds(API_TY aty, const ze_rtas_builder_instance_geometry_info_exp_t* geom, uint32_t primID, BBox3fa& bbox, void* buildUserPtr) { if (primID >= 1) return false; if (geom->pAccelerationStructure == nullptr) return false; if (geom->pTransform == nullptr) return false; const AffineSpace3fa local2world = getTransform(geom); const Vec3fa lower(geom->pBounds->lower.x,geom->pBounds->lower.y,geom->pBounds->lower.z); const Vec3fa upper(geom->pBounds->upper.x,geom->pBounds->upper.y,geom->pBounds->upper.z); const BBox3fa bounds = xfmBounds(local2world,BBox3fa(lower,upper)); if (unlikely(!isvalid(bounds.lower))) return false; if (unlikely(!isvalid(bounds.upper))) return false; if (unlikely(bounds.empty())) return false; bbox = bounds; return true; } template PrimInfo createGeometryPrimRefArray(API_TY aty, const GeometryType* geom, void* buildUserPtr, evector& prims, const range& r, size_t k, unsigned int geomID) { PrimInfo pinfo(empty); for (uint32_t primID=r.begin(); primIDpNext == nullptr) return true; desc = (zet_base_desc_t_*) desc->pNext; } return false; } struct ze_rtas_builder { ze_rtas_builder () { } ~ze_rtas_builder() { magick = 0x0; } bool verify() const { return magick == MAGICK; } enum { MAGICK = 0x45FE67E1 }; uint32_t magick = MAGICK; }; ze_result_t validate(API_TY aty, ze_rtas_builder_exp_handle_t hBuilder) { if (hBuilder == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; if (!((ze_rtas_builder*)hBuilder)->verify()) return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_SUCCESS; } struct ze_rtas_parallel_operation_t { ze_rtas_parallel_operation_t() { } ~ze_rtas_parallel_operation_t() { magick = 0x0; } ze_result_t verify() const { if (magick != MAGICK) return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_SUCCESS; } enum { MAGICK = 0xE84567E1 }; uint32_t magick = MAGICK; std::atomic object_in_use = false; ze_result_t errorCode = ZE_RESULT_SUCCESS; tbb::task_group group; }; ze_result_t validate(API_TY aty, ze_rtas_parallel_operation_exp_handle_t hParallelOperation) { if (hParallelOperation == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; return ((ze_rtas_parallel_operation_t*)hParallelOperation)->verify(); } ze_result_t validate(API_TY aty, const ze_rtas_builder_exp_desc_t* pDescriptor) { if (pDescriptor == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; if (pDescriptor->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC) return ZE_RESULT_ERROR_INVALID_ENUMERATION; if (!checkDescChain((zet_base_desc_t_*)pDescriptor)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; if (uint32_t(ZE_RTAS_BUILDER_EXP_VERSION_CURRENT) < uint32_t(pDescriptor->builderVersion)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t validate(API_TY aty, ze_rtas_device_exp_properties_t* pProperties) { if (pProperties == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES) return ZE_RESULT_ERROR_INVALID_ENUMERATION; if (!checkDescChain((zet_base_desc_t_*)pProperties)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t validate(API_TY aty, ze_rtas_format_exp_t rtasFormat) { if (rtasFormat == ZE_RTAS_FORMAT_EXP_INVALID) return ZE_RESULT_ERROR_INVALID_ENUMERATION; if (uint32_t(rtasFormat) > uint32_t(ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t validate(API_TY aty, const ze_rtas_builder_build_op_exp_desc_t* args) { /* check for valid pointers */ if (args == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; /* check if input descriptor has proper type */ if (args->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC) return ZE_RESULT_ERROR_INVALID_ENUMERATION; /* check valid pNext chain */ if (!checkDescChain((zet_base_desc_t_*)args)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; /* check if acceleration structure format is supported */ VALIDATE(aty,args->rtasFormat); /* check for valid geometries array */ if (args->ppGeometries == nullptr && args->numGeometries > 0) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; /* validate that number of geometries are in range */ if (args->numGeometries > 0x00FFFFFF) return ZE_RESULT_ERROR_INVALID_ENUMERATION; /* validate build quality */ if (args->buildQuality < 0 || ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH < args->buildQuality) return ZE_RESULT_ERROR_INVALID_ENUMERATION; /* validate build flags */ if (args->buildFlags >= (ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION<<1)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t validate(API_TY aty, ze_rtas_builder_exp_properties_t* pProp) { /* check for valid pointers */ if (pProp == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; /* check if return property has proper type */ if (pProp->stype != ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES) return ZE_RESULT_ERROR_INVALID_ENUMERATION; /* check valid pNext chain */ if (!checkDescChain((zet_base_desc_t_*)pProp)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t validate(API_TY aty, ze_rtas_parallel_operation_exp_properties_t* pProperties) { /* check for valid pointer */ if (pProperties == nullptr) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; /* check for proper property */ if (pProperties->stype != ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES) return ZE_RESULT_ERROR_INVALID_ENUMERATION; /* check valid pNext chain */ if (!checkDescChain((zet_base_desc_t_*)pProperties)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; return ZE_RESULT_SUCCESS; } ze_result_t zeRTASBuilderCreateImpl(API_TY aty, ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder) { /* input validation */ VALIDATE(aty,hDriver); VALIDATE(aty,pDescriptor); VALIDATE_PTR(aty,phBuilder); *phBuilder = (ze_rtas_builder_exp_handle_t) new ze_rtas_builder(); return ZE_RESULT_SUCCESS; } ze_result_t zeRTASBuilderDestroyImpl(API_TY aty, ze_rtas_builder_exp_handle_t hBuilder) { VALIDATE(aty,hBuilder); delete (ze_rtas_builder*) hBuilder; return ZE_RESULT_SUCCESS; } ze_result_t zeDriverRTASFormatCompatibilityCheckImpl(API_TY aty, ze_driver_handle_t hDriver, const ze_rtas_format_exp_t accelFormat, const ze_rtas_format_exp_t otherAccelFormat ) { /* input validation */ VALIDATE(aty,hDriver); VALIDATE(aty,accelFormat); VALIDATE(aty,otherAccelFormat); /* check if rtas formats are compatible */ if (accelFormat == otherAccelFormat) return ZE_RESULT_SUCCESS; /* report incompatible format */ return ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE; } uint32_t getNumPrimitives(const ze_rtas_builder_geometry_info_exp_t* geom) { switch (geom->geometryType) { case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return ((ze_rtas_builder_triangles_geometry_info_exp_t*) geom)->triangleCount; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : return ((ze_rtas_builder_procedural_geometry_info_exp_t*) geom)->primCount; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return ((ze_rtas_builder_quads_geometry_info_exp_t*) geom)->quadCount; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : return 1; default : return 0; }; } ze_result_t zeRTASBuilderGetBuildPropertiesImpl(API_TY aty, ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, ze_rtas_builder_exp_properties_t* pProp) { /* input validation */ VALIDATE(aty,hBuilder); VALIDATE(aty,args); VALIDATE(aty,pProp); const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries; const size_t numGeometries = args->numGeometries; auto getSize = [&](uint32_t geomID) -> size_t { const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; if (geom == nullptr) return 0; return getNumPrimitives(geom); }; auto getType = [&](unsigned int geomID) { const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; assert(geom); switch (geom->geometryType) { case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE; default: throw std::runtime_error("invalid geometry type"); }; }; /* query memory requirements from builder */ size_t expectedBytes = 0; size_t worstCaseBytes = 0; size_t scratchBytes = 0; QBVH6BuilderSAH::estimateSize(numGeometries, getSize, getType, args->rtasFormat, args->buildQuality, args->buildFlags, expectedBytes, worstCaseBytes, scratchBytes); /* fill return struct */ pProp->flags = 0; pProp->rtasBufferSizeBytesExpected = expectedBytes; pProp->rtasBufferSizeBytesMaxRequired = worstCaseBytes; pProp->scratchBufferSizeBytes = scratchBytes; return ZE_RESULT_SUCCESS; } ze_result_t zeRTASBuilderBuildBody(API_TY aty, const ze_rtas_builder_build_op_exp_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) try { const ze_rtas_builder_geometry_info_exp_t** geometries = args->ppGeometries; const uint32_t numGeometries = args->numGeometries; /* verify input descriptors */ parallel_for(numGeometries,[&](uint32_t geomID) { const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; if (geom == nullptr) return; switch (geom->geometryType) { case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : verifyGeometryDesc((ze_rtas_builder_triangles_geometry_info_exp_t*)geom); break; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : verifyGeometryDesc((ze_rtas_builder_quads_geometry_info_exp_t* )geom); break; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL : verifyGeometryDesc((ze_rtas_builder_procedural_geometry_info_exp_t*)geom); break; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE : verifyGeometryDesc((ze_rtas_builder_instance_geometry_info_exp_t* )geom); break; default: throw std::runtime_error("invalid geometry type"); }; }); auto getSize = [&](uint32_t geomID) -> size_t { const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; if (geom == nullptr) return 0; return getNumPrimitives(geom); }; auto getType = [&](unsigned int geomID) { const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; assert(geom); switch (geom->geometryType) { case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return QBVH6BuilderSAH::TRIANGLE; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS: return QBVH6BuilderSAH::QUAD; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return QBVH6BuilderSAH::PROCEDURAL; case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return QBVH6BuilderSAH::INSTANCE; default: throw std::runtime_error("invalid geometry type"); }; }; auto createPrimRefArray = [&] (evector& prims, BBox1f time_range, const range& r, size_t k, unsigned int geomID) -> PrimInfo { const ze_rtas_builder_geometry_info_exp_t* geom = geometries[geomID]; assert(geom); switch (geom->geometryType) { case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES : return createGeometryPrimRefArray(aty,(ze_rtas_builder_triangles_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID); case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS : return createGeometryPrimRefArray(aty,(ze_rtas_builder_quads_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID); case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL: return createGeometryPrimRefArray(aty,(ze_rtas_builder_procedural_geometry_info_exp_t*)geom,pBuildUserPtr,prims,r,k,geomID); case ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE: return createGeometryPrimRefArray(aty,(ze_rtas_builder_instance_geometry_info_exp_t* )geom,pBuildUserPtr,prims,r,k,geomID); default: throw std::runtime_error("invalid geometry type"); }; }; auto convertGeometryFlags = [&] (ze_rtas_builder_packed_geometry_exp_flags_t flags) -> GeometryFlags { return (flags & ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE) ? GeometryFlags::NONE : GeometryFlags::OPAQUE; }; auto getTriangle = [&](unsigned int geomID, unsigned int primID) { const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID]; assert(geom); const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID); if (unlikely(tri.v0 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle(); if (unlikely(tri.v1 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle(); if (unlikely(tri.v2 >= geom->vertexCount)) return QBVH6BuilderSAH::Triangle(); const Vec3f p0 = getVertex(geom,tri.v0); const Vec3f p1 = getVertex(geom,tri.v1); const Vec3f p2 = getVertex(geom,tri.v2); if (unlikely(!isvalid(p0))) return QBVH6BuilderSAH::Triangle(); if (unlikely(!isvalid(p1))) return QBVH6BuilderSAH::Triangle(); if (unlikely(!isvalid(p2))) return QBVH6BuilderSAH::Triangle(); const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags); return QBVH6BuilderSAH::Triangle(tri.v0,tri.v1,tri.v2,p0,p1,p2,gflags,geom->geometryMask); }; auto getTriangleIndices = [&] (uint32_t geomID, uint32_t primID) { const ze_rtas_builder_triangles_geometry_info_exp_t* geom = (const ze_rtas_builder_triangles_geometry_info_exp_t*) geometries[geomID]; assert(geom); const ze_rtas_triangle_indices_uint32_exp_t tri = getPrimitive(geom,primID); return Vec3(tri.v0,tri.v1,tri.v2); }; auto getQuad = [&](unsigned int geomID, unsigned int primID) { const ze_rtas_builder_quads_geometry_info_exp_t* geom = (const ze_rtas_builder_quads_geometry_info_exp_t*) geometries[geomID]; assert(geom); const ze_rtas_quad_indices_uint32_exp_t quad = getPrimitive(geom,primID); const Vec3f p0 = getVertex(geom,quad.v0); const Vec3f p1 = getVertex(geom,quad.v1); const Vec3f p2 = getVertex(geom,quad.v2); const Vec3f p3 = getVertex(geom,quad.v3); const GeometryFlags gflags = convertGeometryFlags(geom->geometryFlags); return QBVH6BuilderSAH::Quad(p0,p1,p2,p3,gflags,geom->geometryMask); }; auto getProcedural = [&](unsigned int geomID, unsigned int primID) { const ze_rtas_builder_procedural_geometry_info_exp_t* geom = (const ze_rtas_builder_procedural_geometry_info_exp_t*) geometries[geomID]; assert(geom); return QBVH6BuilderSAH::Procedural(geom->geometryMask); // FIXME: pass gflags }; auto getInstance = [&](unsigned int geomID, unsigned int primID) { assert(geometries[geomID]); assert(geometries[geomID]->geometryType == ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE); const ze_rtas_builder_instance_geometry_info_exp_t* geom = (const ze_rtas_builder_instance_geometry_info_exp_t*) geometries[geomID]; void* accel = geom->pAccelerationStructure; const AffineSpace3fa local2world = getTransform(geom); return QBVH6BuilderSAH::Instance(local2world,accel,geom->geometryMask,geom->instanceUserID); // FIXME: pass instance flags }; /* dispatch globals ptr for debugging purposes */ void* dispatchGlobalsPtr = nullptr; #if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) if (args->pNext) { zet_base_desc_t_* next = (zet_base_desc_t_*) args->pNext; if (next->stype == ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_DEBUG_DESC) { ze_rtas_builder_build_op_debug_desc_t* debug_ext = (ze_rtas_builder_build_op_debug_desc_t*) next; dispatchGlobalsPtr = debug_ext->dispatchGlobalsPtr; } } #endif bool verbose = false; bool success = QBVH6BuilderSAH::build(numGeometries, nullptr, getSize, getType, createPrimRefArray, getTriangle, getTriangleIndices, getQuad, getProcedural, getInstance, (char*)pRtasBuffer, rtasBufferSizeBytes, pScratchBuffer, scratchBufferSizeBytes, (BBox3f*) pBounds, pRtasBufferSizeBytes, args->rtasFormat, args->buildQuality, args->buildFlags, verbose, dispatchGlobalsPtr); if (!success) { return ZE_RESULT_EXP_RTAS_BUILD_RETRY; } return ZE_RESULT_SUCCESS; } catch (std::exception& e) { //std::cerr << "caught exception during BVH build: " << e.what() << std::endl; return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t zeRTASBuilderBuildImpl(API_TY aty, ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_exp_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) { /* input validation */ VALIDATE(aty,hBuilder); VALIDATE(aty,args); VALIDATE_PTR(aty,pScratchBuffer); VALIDATE_PTR(aty,pRtasBuffer); /* if parallel operation is provided then execute using thread arena inside task group ... */ if (hParallelOperation) { VALIDATE(aty,hParallelOperation); ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation; if (op->object_in_use.load()) return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; op->object_in_use.store(true); get_arena().execute([&](){ op->group.run([=](){ op->errorCode = zeRTASBuilderBuildBody(aty,args, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, pBuildUserPtr, pBounds, pRtasBufferSizeBytes); }); }); return ZE_RESULT_EXP_RTAS_BUILD_DEFERRED; } /* ... otherwise we just execute inside task arena to avoid spawning of TBB worker threads */ else { ze_result_t errorCode = ZE_RESULT_SUCCESS; get_arena().execute([&](){ errorCode = zeRTASBuilderBuildBody(aty,args, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, pBuildUserPtr, pBounds, pRtasBufferSizeBytes); }); return errorCode; } } ze_result_t zeRTASParallelOperationCreateImpl(API_TY aty, ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation) { /* input validation */ VALIDATE(aty,hDriver); VALIDATE_PTR(aty,phParallelOperation); /* create parallel operation object */ *phParallelOperation = (ze_rtas_parallel_operation_exp_handle_t) new ze_rtas_parallel_operation_t(); return ZE_RESULT_SUCCESS; } ze_result_t zeRTASParallelOperationDestroyImpl(API_TY aty, ze_rtas_parallel_operation_exp_handle_t hParallelOperation ) { /* input validation */ VALIDATE(aty,hParallelOperation); /* delete parallel operation */ delete (ze_rtas_parallel_operation_t*) hParallelOperation; return ZE_RESULT_SUCCESS; } ze_result_t zeRTASParallelOperationGetPropertiesImpl(API_TY aty, ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ) { /* input validation */ VALIDATE(aty,hParallelOperation); VALIDATE(aty,pProperties); ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation; if (!op->object_in_use.load()) return ZE_RESULT_ERROR_INVALID_ARGUMENT; /* return properties */ pProperties->flags = 0; pProperties->maxConcurrency = tbb::this_task_arena::max_concurrency(); return ZE_RESULT_SUCCESS; } ze_result_t zeRTASParallelOperationJoinImpl(API_TY aty, ze_rtas_parallel_operation_exp_handle_t hParallelOperation) { /* check for valid handle */ VALIDATE(aty,hParallelOperation); ze_rtas_parallel_operation_t* op = (ze_rtas_parallel_operation_t*) hParallelOperation; get_arena().execute([&](){ op->group.wait(); }); op->object_in_use.store(false); // this is slighty too early return op->errorCode; } /* entry points for EXT API */ RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExtImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_ext_desc_t *pDescriptor, ze_rtas_builder_ext_handle_t *phBuilder) { return zeRTASBuilderCreateImpl(EXT_API, hDriver, (ze_rtas_builder_exp_desc_t*) pDescriptor, (ze_rtas_builder_exp_handle_t*) phBuilder); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExtImpl(ze_rtas_builder_ext_handle_t hBuilder) { return zeRTASBuilderDestroyImpl(EXT_API, (ze_rtas_builder_exp_handle_t) hBuilder); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExtImpl( ze_driver_handle_t hDriver, const ze_rtas_format_ext_t accelFormat, const ze_rtas_format_ext_t otherAccelFormat ) { return zeDriverRTASFormatCompatibilityCheckImpl( EXT_API, hDriver, (ze_rtas_format_exp_t) accelFormat, (ze_rtas_format_exp_t) otherAccelFormat ); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExtImpl(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, ze_rtas_builder_ext_properties_t* pProp) { return zeRTASBuilderGetBuildPropertiesImpl(EXT_API, (ze_rtas_builder_exp_handle_t) hBuilder, (const ze_rtas_builder_build_op_exp_desc_t*) args, (ze_rtas_builder_exp_properties_t*) pProp); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExtImpl(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_ext_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_ext_t *pBounds, size_t *pRtasBufferSizeBytes) { return zeRTASBuilderBuildImpl(EXT_API, (ze_rtas_builder_exp_handle_t) hBuilder, (const ze_rtas_builder_build_op_exp_desc_t*) args, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, (ze_rtas_parallel_operation_exp_handle_t) hParallelOperation, pBuildUserPtr, (ze_rtas_aabb_exp_t*) pBounds, pRtasBufferSizeBytes); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExtImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_ext_handle_t* phParallelOperation) { return zeRTASParallelOperationCreateImpl(EXT_API, hDriver, (ze_rtas_parallel_operation_exp_handle_t*) phParallelOperation); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExtImpl( ze_rtas_parallel_operation_ext_handle_t hParallelOperation ) { return zeRTASParallelOperationDestroyImpl( EXT_API, (ze_rtas_parallel_operation_exp_handle_t) hParallelOperation ); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExtImpl( ze_rtas_parallel_operation_ext_handle_t hParallelOperation, ze_rtas_parallel_operation_ext_properties_t* pProperties ) { return zeRTASParallelOperationGetPropertiesImpl( EXT_API, (ze_rtas_parallel_operation_exp_handle_t) hParallelOperation, (ze_rtas_parallel_operation_exp_properties_t*) pProperties ); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExtImpl( ze_rtas_parallel_operation_ext_handle_t hParallelOperation) { return zeRTASParallelOperationJoinImpl( EXT_API, (ze_rtas_parallel_operation_exp_handle_t) hParallelOperation); } /* entry points for EXP API */ RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder) { return zeRTASBuilderCreateImpl(EXP_API, hDriver, pDescriptor, phBuilder); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder) { return zeRTASBuilderDestroyImpl(EXP_API, hBuilder); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver, const ze_rtas_format_exp_t accelFormat, const ze_rtas_format_exp_t otherAccelFormat ) { return zeDriverRTASFormatCompatibilityCheckImpl( EXP_API, hDriver, accelFormat, otherAccelFormat ); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, ze_rtas_builder_exp_properties_t* pProp) { return zeRTASBuilderGetBuildPropertiesImpl(EXP_API, hBuilder, args, pProp); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_exp_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes) { return zeRTASBuilderBuildImpl(EXP_API, hBuilder, args, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, hParallelOperation, pBuildUserPtr, pBounds, pRtasBufferSizeBytes); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation) { return zeRTASParallelOperationCreateImpl(EXP_API, hDriver, phParallelOperation); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ) { return zeRTASParallelOperationDestroyImpl( EXP_API, hParallelOperation ); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ) { return zeRTASParallelOperationGetPropertiesImpl( EXP_API, hParallelOperation, pProperties ); } RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation) { return zeRTASParallelOperationJoinImpl( EXP_API, hParallelOperation); } } level-zero-raytracing-support-1.2.3/rtbuild/rtbuild.h000066400000000000000000000136601514453371700227570ustar00rootroot00000000000000// Copyright 2009-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include #include #include "level_zero/ze_wrapper.h" #if defined(__cplusplus) # define RTHWIF_API_EXTERN_C extern "C" #else # define RTHWIF_API_EXTERN_C #endif #if defined(_WIN32) #if defined(EMBREE_RTHWIF_STATIC_LIB) # define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C # define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C #else # define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C __declspec(dllimport) # define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __declspec(dllexport) #endif #else # define RTHWIF_API_IMPORT RTHWIF_API_EXTERN_C # define RTHWIF_API_EXPORT RTHWIF_API_EXTERN_C __attribute__ ((visibility ("default"))) #endif typedef enum _ze_raytracing_accel_format_internal_t { ZE_RTAS_DEVICE_FORMAT_EXP_INVALID = 0, // invalid acceleration structure format ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_1 = 1, // acceleration structure format version 1 ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_2 = 2, // acceleration structure format version 2 ZE_RTAS_DEVICE_FORMAT_EXP_VERSION_MAX = 2 } ze_raytracing_accel_format_internal_t; /* EXT version of API */ RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExtImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_ext_desc_t *pDescriptor, ze_rtas_builder_ext_handle_t *phBuilder); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExtImpl(ze_rtas_builder_ext_handle_t hBuilder); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExtImpl( ze_driver_handle_t hDriver, const ze_rtas_format_ext_t accelFormat, const ze_rtas_format_ext_t otherAccelFormat); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExtImpl(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, ze_rtas_builder_ext_properties_t* pProp); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExtImpl(ze_rtas_builder_ext_handle_t hBuilder, const ze_rtas_builder_build_op_ext_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_ext_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_ext_t *pBounds, size_t *pRtasBufferSizeBytes); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExtImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_ext_handle_t* phParallelOperation); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExtImpl( ze_rtas_parallel_operation_ext_handle_t hParallelOperation ); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExtImpl( ze_rtas_parallel_operation_ext_handle_t hParallelOperation, ze_rtas_parallel_operation_ext_properties_t* pProperties ); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExtImpl( ze_rtas_parallel_operation_ext_handle_t hParallelOperation); /* EXP version of API */ RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderCreateExpImpl(ze_driver_handle_t hDriver, const ze_rtas_builder_exp_desc_t *pDescriptor, ze_rtas_builder_exp_handle_t *phBuilder); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderDestroyExpImpl(ze_rtas_builder_exp_handle_t hBuilder); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeDriverRTASFormatCompatibilityCheckExpImpl( ze_driver_handle_t hDriver, const ze_rtas_format_exp_t accelFormat, const ze_rtas_format_exp_t otherAccelFormat); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderGetBuildPropertiesExpImpl(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, ze_rtas_builder_exp_properties_t* pProp); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASBuilderBuildExpImpl(ze_rtas_builder_exp_handle_t hBuilder, const ze_rtas_builder_build_op_exp_desc_t* args, void *pScratchBuffer, size_t scratchBufferSizeBytes, void *pRtasBuffer, size_t rtasBufferSizeBytes, ze_rtas_parallel_operation_exp_handle_t hParallelOperation, void *pBuildUserPtr, ze_rtas_aabb_exp_t *pBounds, size_t *pRtasBufferSizeBytes); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationCreateExpImpl(ze_driver_handle_t hDriver, ze_rtas_parallel_operation_exp_handle_t* phParallelOperation); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationDestroyExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation ); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationGetPropertiesExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation, ze_rtas_parallel_operation_exp_properties_t* pProperties ); RTHWIF_API_EXPORT ze_result_t ZE_APICALL zeRTASParallelOperationJoinExpImpl( ze_rtas_parallel_operation_exp_handle_t hParallelOperation); level-zero-raytracing-support-1.2.3/rtbuild/simd/000077500000000000000000000000001514453371700220675ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rtbuild/simd/CMakeLists.txt000066400000000000000000000007171514453371700246340ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 ADD_LIBRARY(simd STATIC sse.cpp) SET_PROPERTY(TARGET simd PROPERTY FOLDER common) SET_PROPERTY(TARGET simd APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}") IF (EMBREE_STATIC_LIB) INSTALL(TARGETS simd EXPORT simd-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel) INSTALL(EXPORT simd-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) ENDIF() level-zero-raytracing-support-1.2.3/rtbuild/simd/simd.h000066400000000000000000000006261514453371700232000ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../math/emath.h" /* include SSE wrapper classes */ #include "sse.h" namespace embree { template __forceinline vbool isfinite(const vfloat& v) { return (v >= vfloat(-std::numeric_limits::max())) & (v <= vfloat( std::numeric_limits::max())); } } level-zero-raytracing-support-1.2.3/rtbuild/simd/sse.cpp000066400000000000000000000022731514453371700233710ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "sse.h" namespace embree { const __m128 mm_lookupmask_ps[16] = { _mm_castsi128_ps(_mm_set_epi32( 0, 0, 0, 0)), _mm_castsi128_ps(_mm_set_epi32( 0, 0, 0,-1)), _mm_castsi128_ps(_mm_set_epi32( 0, 0,-1, 0)), _mm_castsi128_ps(_mm_set_epi32( 0, 0,-1,-1)), _mm_castsi128_ps(_mm_set_epi32( 0,-1, 0, 0)), _mm_castsi128_ps(_mm_set_epi32( 0,-1, 0,-1)), _mm_castsi128_ps(_mm_set_epi32( 0,-1,-1, 0)), _mm_castsi128_ps(_mm_set_epi32( 0,-1,-1,-1)), _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)), _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0,-1)), _mm_castsi128_ps(_mm_set_epi32(-1, 0,-1, 0)), _mm_castsi128_ps(_mm_set_epi32(-1, 0,-1,-1)), _mm_castsi128_ps(_mm_set_epi32(-1,-1, 0, 0)), _mm_castsi128_ps(_mm_set_epi32(-1,-1, 0,-1)), _mm_castsi128_ps(_mm_set_epi32(-1,-1,-1, 0)), _mm_castsi128_ps(_mm_set_epi32(-1,-1,-1,-1)) }; const __m128d mm_lookupmask_pd[4] = { _mm_castsi128_pd(_mm_set_epi32( 0, 0, 0, 0)), _mm_castsi128_pd(_mm_set_epi32( 0, 0,-1,-1)), _mm_castsi128_pd(_mm_set_epi32(-1,-1, 0, 0)), _mm_castsi128_pd(_mm_set_epi32(-1,-1,-1,-1)) }; } level-zero-raytracing-support-1.2.3/rtbuild/simd/sse.h000066400000000000000000000013371514453371700230360ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/platform.h" #include "../sys/intrinsics.h" #include "../sys/alloc.h" #include "../math/constants.h" #include "varying.h" namespace embree { #if defined(__SSE4_1__) __forceinline __m128 blendv_ps(__m128 f, __m128 t, __m128 mask) { return _mm_blendv_ps(f,t,mask); } #else __forceinline __m128 blendv_ps(__m128 f, __m128 t, __m128 mask) { return _mm_or_ps(_mm_and_ps(mask, t), _mm_andnot_ps(mask, f)); } #endif extern const __m128 mm_lookupmask_ps[16]; extern const __m128d mm_lookupmask_pd[4]; } #include "vboolf4_sse2.h" #include "vint4_sse2.h" #include "vuint4_sse2.h" #include "vfloat4_sse2.h" level-zero-raytracing-support-1.2.3/rtbuild/simd/varying.h000066400000000000000000000071021514453371700237170ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../sys/platform.h" namespace embree { /* Varying numeric types */ template struct vfloat_impl { union { float f[N]; int i[N]; }; __forceinline const float& operator [](size_t index) const { assert(index < N); return f[index]; } __forceinline float& operator [](size_t index) { assert(index < N); return f[index]; } }; template struct vdouble_impl { union { double f[N]; long long i[N]; }; __forceinline const double& operator [](size_t index) const { assert(index < N); return f[index]; } __forceinline double& operator [](size_t index) { assert(index < N); return f[index]; } }; template struct vint_impl { int i[N]; __forceinline const int& operator [](size_t index) const { assert(index < N); return i[index]; } __forceinline int& operator [](size_t index) { assert(index < N); return i[index]; } }; template struct vuint_impl { unsigned int i[N]; __forceinline const unsigned int& operator [](size_t index) const { assert(index < N); return i[index]; } __forceinline unsigned int& operator [](size_t index) { assert(index < N); return i[index]; } }; template struct vllong_impl { long long i[N]; __forceinline const long long& operator [](size_t index) const { assert(index < N); return i[index]; } __forceinline long long& operator [](size_t index) { assert(index < N); return i[index]; } }; /* Varying bool types */ template struct vboolf_impl { int i[N]; }; // for float/int template struct vboold_impl { long long i[N]; }; // for double/long long /* Varying size constants */ const int VSIZEX = 4; const int VSIZEL = 4; template struct vtypes { using vbool = vboolf_impl; using vboolf = vboolf_impl; using vboold = vboold_impl; using vint = vint_impl; using vuint = vuint_impl; using vllong = vllong_impl; using vfloat = vfloat_impl; using vdouble = vdouble_impl; }; template<> struct vtypes<1> { using vbool = bool; using vboolf = bool; using vboold = bool; using vint = int; using vuint = unsigned int; using vllong = long long; using vfloat = float; using vdouble = double; }; /* Aliases to default types */ template using vbool = typename vtypes::vbool; template using vboolf = typename vtypes::vboolf; template using vboold = typename vtypes::vboold; template using vint = typename vtypes::vint; template using vuint = typename vtypes::vuint; template using vllong = typename vtypes::vllong; template using vreal = typename vtypes::vfloat; template using vfloat = typename vtypes::vfloat; template using vdouble = typename vtypes::vdouble; /* 4-wide shortcuts */ typedef vfloat<4> vfloat4; typedef vdouble<4> vdouble4; typedef vreal<4> vreal4; typedef vint<4> vint4; typedef vuint<4> vuint4; typedef vllong<4> vllong4; typedef vbool<4> vbool4; typedef vboolf<4> vboolf4; typedef vboold<4> vboold4; /* Default shortcuts */ typedef vfloat vfloatx; typedef vdouble vdoublex; typedef vreal vrealx; typedef vint vintx; typedef vuint vuintx; typedef vllong vllongx; typedef vbool vboolx; typedef vboolf vboolfx; typedef vboold vbooldx; } level-zero-raytracing-support-1.2.3/rtbuild/simd/vboolf4_sse2.h000066400000000000000000000206471514453371700245600ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #define vboolf vboolf_impl #define vboold vboold_impl #define vint vint_impl #define vuint vuint_impl #define vllong vllong_impl #define vfloat vfloat_impl #define vdouble vdouble_impl namespace embree { /* 4-wide SSE bool type */ template<> struct vboolf<4> { ALIGNED_STRUCT_(16); typedef vboolf4 Bool; typedef vint4 Int; typedef vfloat4 Float; enum { size = 4 }; // number of SIMD elements union { __m128 v; int i[4]; }; // data //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf() {} __forceinline vboolf(const vboolf4& other) { v = other.v; } __forceinline vboolf4& operator =(const vboolf4& other) { v = other.v; return *this; } __forceinline vboolf(__m128 input) : v(input) {} __forceinline operator const __m128&() const { return v; } __forceinline operator const __m128i() const { return _mm_castps_si128(v); } __forceinline operator const __m128d() const { return _mm_castps_pd(v); } __forceinline vboolf(bool a) : v(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {} __forceinline vboolf(bool a, bool b) : v(mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {} __forceinline vboolf(bool a, bool b, bool c, bool d) : v(mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {} __forceinline vboolf(int mask) { assert(mask >= 0 && mask < 16); v = mm_lookupmask_ps[mask]; } __forceinline vboolf(unsigned int mask) { assert(mask < 16); v = mm_lookupmask_ps[mask]; } /* return int32 mask */ __forceinline __m128i mask32() const { return _mm_castps_si128(v); } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf(FalseTy) : v(_mm_setzero_ps()) {} __forceinline vboolf(TrueTy) : v(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {} //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline bool operator [](size_t index) const { assert(index < 4); return (_mm_movemask_ps(v) >> index) & 1; } __forceinline int& operator [](size_t index) { assert(index < 4); return i[index]; } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 operator !(const vboolf4& a) { return _mm_xor_ps(a, vboolf4(embree::True)); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 operator &(const vboolf4& a, const vboolf4& b) { return _mm_and_ps(a, b); } __forceinline vboolf4 operator |(const vboolf4& a, const vboolf4& b) { return _mm_or_ps (a, b); } __forceinline vboolf4 operator ^(const vboolf4& a, const vboolf4& b) { return _mm_xor_ps(a, b); } __forceinline vboolf4 andn(const vboolf4& a, const vboolf4& b) { return _mm_andnot_ps(b, a); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4& operator &=(vboolf4& a, const vboolf4& b) { return a = a & b; } __forceinline vboolf4& operator |=(vboolf4& a, const vboolf4& b) { return a = a | b; } __forceinline vboolf4& operator ^=(vboolf4& a, const vboolf4& b) { return a = a ^ b; } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 operator !=(const vboolf4& a, const vboolf4& b) { return _mm_xor_ps(a, b); } __forceinline vboolf4 operator ==(const vboolf4& a, const vboolf4& b) { return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); } __forceinline vboolf4 select(const vboolf4& m, const vboolf4& t, const vboolf4& f) { #if defined(__SSE4_1__) return _mm_blendv_ps(f, t, m); #else return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); #endif } //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 unpacklo(const vboolf4& a, const vboolf4& b) { return _mm_unpacklo_ps(a, b); } __forceinline vboolf4 unpackhi(const vboolf4& a, const vboolf4& b) { return _mm_unpackhi_ps(a, b); } template __forceinline vboolf4 shuffle(const vboolf4& v) { return _mm_castsi128_ps(_mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0))); } template __forceinline vboolf4 shuffle(const vboolf4& a, const vboolf4& b) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } template __forceinline vboolf4 shuffle(const vboolf4& v) { return shuffle(v); } #if defined(__SSE3__) template<> __forceinline vboolf4 shuffle<0, 0, 2, 2>(const vboolf4& v) { return _mm_moveldup_ps(v); } template<> __forceinline vboolf4 shuffle<1, 1, 3, 3>(const vboolf4& v) { return _mm_movehdup_ps(v); } template<> __forceinline vboolf4 shuffle<0, 1, 0, 1>(const vboolf4& v) { return _mm_castpd_ps(_mm_movedup_pd(v)); } #endif #if defined(__SSE4_1__) template __forceinline vboolf4 insert(const vboolf4& a, const vboolf4& b) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); } template __forceinline vboolf4 insert(const vboolf4& a, const vboolf4& b) { return insert(a, b); } template __forceinline vboolf4 insert(const vboolf4& a, const bool b) { return insert(a, vboolf4(b)); } #endif //////////////////////////////////////////////////////////////////////////////// /// Reduction Operations //////////////////////////////////////////////////////////////////////////////// __forceinline bool reduce_and(const vboolf4& a) { return _mm_movemask_ps(a) == 0xf; } __forceinline bool reduce_or (const vboolf4& a) { return _mm_movemask_ps(a) != 0x0; } __forceinline bool all (const vboolf4& b) { return _mm_movemask_ps(b) == 0xf; } __forceinline bool any (const vboolf4& b) { return _mm_movemask_ps(b) != 0x0; } __forceinline bool none(const vboolf4& b) { return _mm_movemask_ps(b) == 0x0; } __forceinline bool all (const vboolf4& valid, const vboolf4& b) { return all((!valid) | b); } __forceinline bool any (const vboolf4& valid, const vboolf4& b) { return any(valid & b); } __forceinline bool none(const vboolf4& valid, const vboolf4& b) { return none(valid & b); } __forceinline size_t movemask(const vboolf4& a) { return _mm_movemask_ps(a); } #if defined(__SSE4_2__) __forceinline size_t popcnt(const vboolf4& a) { return popcnt((size_t)_mm_movemask_ps(a)); } #else __forceinline size_t popcnt(const vboolf4& a) { return bool(a[0])+bool(a[1])+bool(a[2])+bool(a[3]); } #endif //////////////////////////////////////////////////////////////////////////////// /// Get/Set Functions //////////////////////////////////////////////////////////////////////////////// __forceinline bool get(const vboolf4& a, size_t index) { return a[index]; } __forceinline void set(vboolf4& a, size_t index) { a[index] = -1; } __forceinline void clear(vboolf4& a, size_t index) { a[index] = 0; } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator <<(embree_ostream cout, const vboolf4& a) { return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ">"; } } #undef vboolf #undef vboold #undef vint #undef vuint #undef vllong #undef vfloat #undef vdouble level-zero-raytracing-support-1.2.3/rtbuild/simd/vfloat4_sse2.h000066400000000000000000000512461514453371700245630ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #define vboolf vboolf_impl #define vboold vboold_impl #define vint vint_impl #define vuint vuint_impl #define vllong vllong_impl #define vfloat vfloat_impl #define vdouble vdouble_impl namespace embree { /* 4-wide SSE float type */ template<> struct vfloat<4> { ALIGNED_STRUCT_(16); typedef vboolf4 Bool; typedef vint4 Int; typedef vfloat4 Float; enum { size = 4 }; // number of SIMD elements union { __m128 v; float f[4]; int i[4]; }; // data //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat() {} __forceinline vfloat(const vfloat4& other) { v = other.v; } //__forceinline vfloat(const vfloat4& other) = default; __forceinline vfloat4& operator =(const vfloat4& other) { v = other.v; return *this; } __forceinline vfloat(__m128 a) : v(a) {} __forceinline operator const __m128&() const { return v; } __forceinline operator __m128&() { return v; } __forceinline vfloat(float a) : v(_mm_set1_ps(a)) {} __forceinline vfloat(float a, float b, float c, float d) : v(_mm_set_ps(d, c, b, a)) {} __forceinline explicit vfloat(const vint4& a) : v(_mm_cvtepi32_ps(a)) {} __forceinline explicit vfloat(const vuint4& x) { const __m128i a = _mm_and_si128(x,_mm_set1_epi32(0x7FFFFFFF)); const __m128i b = _mm_and_si128(_mm_srai_epi32(x,31),_mm_set1_epi32(0x4F000000)); //0x4F000000 = 2^31 const __m128 af = _mm_cvtepi32_ps(a); const __m128 bf = _mm_castsi128_ps(b); v = _mm_add_ps(af,bf); } //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat(ZeroTy) : v(_mm_setzero_ps()) {} __forceinline vfloat(OneTy) : v(_mm_set1_ps(1.0f)) {} __forceinline vfloat(PosInfTy) : v(_mm_set1_ps(pos_inf)) {} __forceinline vfloat(NegInfTy) : v(_mm_set1_ps(neg_inf)) {} __forceinline vfloat(StepTy) : v(_mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f)) {} __forceinline vfloat(NaNTy) : v(_mm_set1_ps(nan)) {} __forceinline vfloat(UndefinedTy) : v(_mm_undefined_ps()) {} //////////////////////////////////////////////////////////////////////////////// /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// static __forceinline vfloat4 load (const void* a) { return _mm_load_ps((float*)a); } static __forceinline vfloat4 loadu(const void* a) { return _mm_loadu_ps((float*)a); } static __forceinline void store (void* ptr, const vfloat4& v) { _mm_store_ps((float*)ptr,v); } static __forceinline void storeu(void* ptr, const vfloat4& v) { _mm_storeu_ps((float*)ptr,v); } static __forceinline vfloat4 load (const vboolf4& mask, const void* ptr) { return _mm_and_ps(_mm_load_ps ((float*)ptr),mask); } static __forceinline vfloat4 loadu(const vboolf4& mask, const void* ptr) { return _mm_and_ps(_mm_loadu_ps((float*)ptr),mask); } static __forceinline void store (const vboolf4& mask, void* ptr, const vfloat4& v) { store (ptr,select(mask,v,load (ptr))); } static __forceinline void storeu(const vboolf4& mask, void* ptr, const vfloat4& v) { storeu(ptr,select(mask,v,loadu(ptr))); } static __forceinline vfloat4 broadcast(const void* a) { return _mm_set1_ps(*(float*)a); } //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const float& operator [](size_t index) const { assert(index < 4); return f[index]; } __forceinline float& operator [](size_t index) { assert(index < 4); return f[index]; } friend __forceinline vfloat4 select(const vboolf4& m, const vfloat4& t, const vfloat4& f) { #if defined(__SSE4_1__) return _mm_blendv_ps(f, t, m); #else return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); #endif } }; //////////////////////////////////////////////////////////////////////////////// /// Load/Store //////////////////////////////////////////////////////////////////////////////// template<> struct mem { static __forceinline vfloat4 load (const vboolf4& mask, const void* ptr) { return vfloat4::load (mask,ptr); } static __forceinline vfloat4 loadu(const vboolf4& mask, const void* ptr) { return vfloat4::loadu(mask,ptr); } static __forceinline void store (const vboolf4& mask, void* ptr, const vfloat4& v) { vfloat4::store (mask,ptr,v); } static __forceinline void storeu(const vboolf4& mask, void* ptr, const vfloat4& v) { vfloat4::storeu(mask,ptr,v); } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat4 asFloat(const vint4& a) { return _mm_castsi128_ps(a); } __forceinline vint4 asInt (const vfloat4& a) { return _mm_castps_si128(a); } __forceinline vuint4 asUInt (const vfloat4& a) { return _mm_castps_si128(a); } __forceinline vint4 toInt (const vfloat4& a) { return vint4(a); } __forceinline vfloat4 toFloat(const vint4& a) { return vfloat4(a); } __forceinline vfloat4 operator +(const vfloat4& a) { return a; } __forceinline vfloat4 operator -(const vfloat4& a) { return _mm_xor_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } __forceinline vfloat4 abs(const vfloat4& a) { return _mm_and_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); } __forceinline vfloat4 sign(const vfloat4& a) { return blendv_ps(vfloat4(one), -vfloat4(one), _mm_cmplt_ps(a, vfloat4(zero))); } __forceinline vfloat4 signmsk(const vfloat4& a) { return _mm_and_ps(a,_mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } __forceinline vfloat4 rcp(const vfloat4& a) { const vfloat4 r = _mm_rcp_ps(a); return _mm_add_ps(r,_mm_mul_ps(r, _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r)))); // computes r + r * (1 - a * r) } __forceinline vfloat4 sqr (const vfloat4& a) { return _mm_mul_ps(a,a); } __forceinline vfloat4 sqrt(const vfloat4& a) { return _mm_sqrt_ps(a); } __forceinline vfloat4 rsqrt(const vfloat4& a) { vfloat4 r = _mm_rsqrt_ps(a); r = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); return r; } __forceinline vboolf4 isnan(const vfloat4& a) { const vfloat4 b = _mm_and_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); return _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_castps_si128(b), _mm_set1_epi32(0x7f800000))); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat4 operator +(const vfloat4& a, const vfloat4& b) { return _mm_add_ps(a, b); } __forceinline vfloat4 operator +(const vfloat4& a, float b) { return a + vfloat4(b); } __forceinline vfloat4 operator +(float a, const vfloat4& b) { return vfloat4(a) + b; } __forceinline vfloat4 operator -(const vfloat4& a, const vfloat4& b) { return _mm_sub_ps(a, b); } __forceinline vfloat4 operator -(const vfloat4& a, float b) { return a - vfloat4(b); } __forceinline vfloat4 operator -(float a, const vfloat4& b) { return vfloat4(a) - b; } __forceinline vfloat4 operator *(const vfloat4& a, const vfloat4& b) { return _mm_mul_ps(a, b); } __forceinline vfloat4 operator *(const vfloat4& a, float b) { return a * vfloat4(b); } __forceinline vfloat4 operator *(float a, const vfloat4& b) { return vfloat4(a) * b; } __forceinline vfloat4 operator /(const vfloat4& a, const vfloat4& b) { return _mm_div_ps(a,b); } __forceinline vfloat4 operator /(const vfloat4& a, float b) { return a/vfloat4(b); } __forceinline vfloat4 operator /(float a, const vfloat4& b) { return vfloat4(a)/b; } __forceinline vfloat4 operator &(const vfloat4& a, const vfloat4& b) { return _mm_and_ps(a,b); } __forceinline vfloat4 operator |(const vfloat4& a, const vfloat4& b) { return _mm_or_ps(a,b); } __forceinline vfloat4 operator ^(const vfloat4& a, const vfloat4& b) { return _mm_xor_ps(a,b); } __forceinline vfloat4 operator ^(const vfloat4& a, const vint4& b) { return _mm_xor_ps(a,_mm_castsi128_ps(b)); } __forceinline vfloat4 min(const vfloat4& a, const vfloat4& b) { return _mm_min_ps(a,b); } __forceinline vfloat4 min(const vfloat4& a, float b) { return _mm_min_ps(a,vfloat4(b)); } __forceinline vfloat4 min(float a, const vfloat4& b) { return _mm_min_ps(vfloat4(a),b); } __forceinline vfloat4 max(const vfloat4& a, const vfloat4& b) { return _mm_max_ps(a,b); } __forceinline vfloat4 max(const vfloat4& a, float b) { return _mm_max_ps(a,vfloat4(b)); } __forceinline vfloat4 max(float a, const vfloat4& b) { return _mm_max_ps(vfloat4(a),b); } //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat4 madd (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b+c; } __forceinline vfloat4 nmadd(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return -a*b+c;} __forceinline vfloat4 nmsub(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return -a*b-c; } __forceinline vfloat4 msub (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b-c; } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat4& operator +=(vfloat4& a, const vfloat4& b) { return a = a + b; } __forceinline vfloat4& operator +=(vfloat4& a, float b) { return a = a + b; } __forceinline vfloat4& operator -=(vfloat4& a, const vfloat4& b) { return a = a - b; } __forceinline vfloat4& operator -=(vfloat4& a, float b) { return a = a - b; } __forceinline vfloat4& operator *=(vfloat4& a, const vfloat4& b) { return a = a * b; } __forceinline vfloat4& operator *=(vfloat4& a, float b) { return a = a * b; } __forceinline vfloat4& operator /=(vfloat4& a, const vfloat4& b) { return a = a / b; } __forceinline vfloat4& operator /=(vfloat4& a, float b) { return a = a / b; } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 operator ==(const vfloat4& a, const vfloat4& b) { return _mm_cmpeq_ps (a, b); } __forceinline vboolf4 operator !=(const vfloat4& a, const vfloat4& b) { return _mm_cmpneq_ps(a, b); } __forceinline vboolf4 operator < (const vfloat4& a, const vfloat4& b) { return _mm_cmplt_ps (a, b); } __forceinline vboolf4 operator >=(const vfloat4& a, const vfloat4& b) { return _mm_cmpnlt_ps(a, b); } __forceinline vboolf4 operator > (const vfloat4& a, const vfloat4& b) { return _mm_cmpnle_ps(a, b); } __forceinline vboolf4 operator <=(const vfloat4& a, const vfloat4& b) { return _mm_cmple_ps (a, b); } __forceinline vboolf4 operator ==(const vfloat4& a, float b) { return a == vfloat4(b); } __forceinline vboolf4 operator ==(float a, const vfloat4& b) { return vfloat4(a) == b; } __forceinline vboolf4 operator !=(const vfloat4& a, float b) { return a != vfloat4(b); } __forceinline vboolf4 operator !=(float a, const vfloat4& b) { return vfloat4(a) != b; } __forceinline vboolf4 operator < (const vfloat4& a, float b) { return a < vfloat4(b); } __forceinline vboolf4 operator < (float a, const vfloat4& b) { return vfloat4(a) < b; } __forceinline vboolf4 operator >=(const vfloat4& a, float b) { return a >= vfloat4(b); } __forceinline vboolf4 operator >=(float a, const vfloat4& b) { return vfloat4(a) >= b; } __forceinline vboolf4 operator > (const vfloat4& a, float b) { return a > vfloat4(b); } __forceinline vboolf4 operator > (float a, const vfloat4& b) { return vfloat4(a) > b; } __forceinline vboolf4 operator <=(const vfloat4& a, float b) { return a <= vfloat4(b); } __forceinline vboolf4 operator <=(float a, const vfloat4& b) { return vfloat4(a) <= b; } __forceinline vboolf4 eq(const vfloat4& a, const vfloat4& b) { return a == b; } __forceinline vboolf4 ne(const vfloat4& a, const vfloat4& b) { return a != b; } __forceinline vboolf4 lt(const vfloat4& a, const vfloat4& b) { return a < b; } __forceinline vboolf4 ge(const vfloat4& a, const vfloat4& b) { return a >= b; } __forceinline vboolf4 gt(const vfloat4& a, const vfloat4& b) { return a > b; } __forceinline vboolf4 le(const vfloat4& a, const vfloat4& b) { return a <= b; } __forceinline vboolf4 eq(const vboolf4& mask, const vfloat4& a, const vfloat4& b) { return mask & (a == b); } __forceinline vboolf4 ne(const vboolf4& mask, const vfloat4& a, const vfloat4& b) { return mask & (a != b); } __forceinline vboolf4 lt(const vboolf4& mask, const vfloat4& a, const vfloat4& b) { return mask & (a < b); } __forceinline vboolf4 ge(const vboolf4& mask, const vfloat4& a, const vfloat4& b) { return mask & (a >= b); } __forceinline vboolf4 gt(const vboolf4& mask, const vfloat4& a, const vfloat4& b) { return mask & (a > b); } __forceinline vboolf4 le(const vboolf4& mask, const vfloat4& a, const vfloat4& b) { return mask & (a <= b); } template __forceinline vfloat4 select(const vfloat4& t, const vfloat4& f) { #if defined(__SSE4_1__) return _mm_blend_ps(f, t, mask); #else return select(vboolf4(mask), t, f); #endif } __forceinline vfloat4 lerp(const vfloat4& a, const vfloat4& b, const vfloat4& t) { return madd(t,b-a,a); } __forceinline bool isvalid(const vfloat4& v) { return all((v > vfloat4(-FLT_LARGE)) & (v < vfloat4(+FLT_LARGE))); } __forceinline bool is_finite(const vfloat4& a) { return all((a >= vfloat4(-FLT_MAX)) & (a <= vfloat4(+FLT_MAX))); } __forceinline bool is_finite(const vboolf4& valid, const vfloat4& a) { return all(valid, (a >= vfloat4(-FLT_MAX)) & (a <= vfloat4(+FLT_MAX))); } //////////////////////////////////////////////////////////////////////////////// /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// #if defined (__SSE4_1__) __forceinline vfloat4 floor(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); } __forceinline vfloat4 ceil (const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); } __forceinline vfloat4 trunc(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_ZERO ); } __forceinline vfloat4 round(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); } #else __forceinline vfloat4 floor(const vfloat4& a) { return vfloat4(floorf(a[0]),floorf(a[1]),floorf(a[2]),floorf(a[3])); } __forceinline vfloat4 ceil (const vfloat4& a) { return vfloat4(ceilf (a[0]),ceilf (a[1]),ceilf (a[2]),ceilf (a[3])); } __forceinline vfloat4 trunc(const vfloat4& a) { return vfloat4(truncf(a[0]),truncf(a[1]),truncf(a[2]),truncf(a[3])); } __forceinline vfloat4 round(const vfloat4& a) { return vfloat4(roundf(a[0]),roundf(a[1]),roundf(a[2]),roundf(a[3])); } #endif __forceinline vfloat4 frac(const vfloat4& a) { return a-floor(a); } __forceinline vint4 floori(const vfloat4& a) { #if defined(__SSE4_1__) return vint4(floor(a)); #else return vint4(a-vfloat4(0.5f)); #endif } //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat4 unpacklo(const vfloat4& a, const vfloat4& b) { return _mm_unpacklo_ps(a, b); } __forceinline vfloat4 unpackhi(const vfloat4& a, const vfloat4& b) { return _mm_unpackhi_ps(a, b); } template __forceinline vfloat4 shuffle(const vfloat4& v) { return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v), _MM_SHUFFLE(i3, i2, i1, i0))); } template __forceinline vfloat4 shuffle(const vfloat4& a, const vfloat4& b) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } #if defined(__SSE3__) template<> __forceinline vfloat4 shuffle<0, 0, 2, 2>(const vfloat4& v) { return _mm_moveldup_ps(v); } template<> __forceinline vfloat4 shuffle<1, 1, 3, 3>(const vfloat4& v) { return _mm_movehdup_ps(v); } template<> __forceinline vfloat4 shuffle<0, 1, 0, 1>(const vfloat4& v) { return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(v))); } #endif template __forceinline vfloat4 shuffle(const vfloat4& v) { return shuffle(v); } template __forceinline float extract (const vfloat4& a) { return _mm_cvtss_f32(shuffle(a)); } template<> __forceinline float extract<0>(const vfloat4& a) { return _mm_cvtss_f32(a); } __forceinline float toScalar(const vfloat4& v) { return _mm_cvtss_f32(v); } //////////////////////////////////////////////////////////////////////////////// /// Transpose //////////////////////////////////////////////////////////////////////////////// __forceinline void transpose(const vfloat4& r0, const vfloat4& r1, const vfloat4& r2, const vfloat4& r3, vfloat4& c0, vfloat4& c1, vfloat4& c2, vfloat4& c3) { vfloat4 l02 = unpacklo(r0,r2); vfloat4 h02 = unpackhi(r0,r2); vfloat4 l13 = unpacklo(r1,r3); vfloat4 h13 = unpackhi(r1,r3); c0 = unpacklo(l02,l13); c1 = unpackhi(l02,l13); c2 = unpacklo(h02,h13); c3 = unpackhi(h02,h13); } __forceinline void transpose(const vfloat4& r0, const vfloat4& r1, const vfloat4& r2, const vfloat4& r3, vfloat4& c0, vfloat4& c1, vfloat4& c2) { vfloat4 l02 = unpacklo(r0,r2); vfloat4 h02 = unpackhi(r0,r2); vfloat4 l13 = unpacklo(r1,r3); vfloat4 h13 = unpackhi(r1,r3); c0 = unpacklo(l02,l13); c1 = unpackhi(l02,l13); c2 = unpacklo(h02,h13); } //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// __forceinline vfloat4 vreduce_min(const vfloat4& v) { vfloat4 h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); } __forceinline vfloat4 vreduce_max(const vfloat4& v) { vfloat4 h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); } __forceinline vfloat4 vreduce_add(const vfloat4& v) { vfloat4 h = shuffle<1,0,3,2>(v) + v ; return shuffle<2,3,0,1>(h) + h ; } __forceinline float reduce_min(const vfloat4& v) { return _mm_cvtss_f32(vreduce_min(v)); } __forceinline float reduce_max(const vfloat4& v) { return _mm_cvtss_f32(vreduce_max(v)); } __forceinline float reduce_add(const vfloat4& v) { return _mm_cvtss_f32(vreduce_add(v)); } __forceinline size_t select_min(const vboolf4& valid, const vfloat4& v) { const vfloat4 a = select(valid,v,vfloat4(pos_inf)); const vbool4 valid_min = valid & (a == vreduce_min(a)); return bsf(movemask(any(valid_min) ? valid_min : valid)); } __forceinline size_t select_max(const vboolf4& valid, const vfloat4& v) { const vfloat4 a = select(valid,v,vfloat4(neg_inf)); const vbool4 valid_max = valid & (a == vreduce_max(a)); return bsf(movemask(any(valid_max) ? valid_max : valid)); } //////////////////////////////////////////////////////////////////////////////// /// Euclidean Space Operators //////////////////////////////////////////////////////////////////////////////// __forceinline float dot(const vfloat4& a, const vfloat4& b) { return reduce_add(a*b); } __forceinline vfloat4 cross(const vfloat4& a, const vfloat4& b) { const vfloat4 a0 = a; const vfloat4 b0 = shuffle<1,2,0,3>(b); const vfloat4 a1 = shuffle<1,2,0,3>(a); const vfloat4 b1 = b; return shuffle<1,2,0,3>(msub(a0,b0,a1*b1)); } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator <<(embree_ostream cout, const vfloat4& a) { return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ">"; } } #undef vboolf #undef vboold #undef vint #undef vuint #undef vllong #undef vfloat #undef vdouble level-zero-raytracing-support-1.2.3/rtbuild/simd/vint4_sse2.h000066400000000000000000000407121514453371700242440ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../math/emath.h" #define vboolf vboolf_impl #define vboold vboold_impl #define vint vint_impl #define vuint vuint_impl #define vllong vllong_impl #define vfloat vfloat_impl #define vdouble vdouble_impl namespace embree { /* 4-wide SSE integer type */ template<> struct vint<4> { ALIGNED_STRUCT_(16); typedef vboolf4 Bool; typedef vint4 Int; typedef vfloat4 Float; enum { size = 4 }; // number of SIMD elements union { __m128i v; int i[4]; }; // data //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vint() {} __forceinline vint(const vint4& a) { v = a.v; } __forceinline vint4& operator =(const vint4& a) { v = a.v; return *this; } __forceinline vint(__m128i a) : v(a) {} __forceinline operator const __m128i&() const { return v; } __forceinline operator __m128i&() { return v; } __forceinline vint(int a) : v(_mm_set1_epi32(a)) {} __forceinline vint(int a, int b, int c, int d) : v(_mm_set_epi32(d, c, b, a)) {} __forceinline explicit vint(__m128 a) : v(_mm_cvtps_epi32(a)) {} __forceinline explicit vint(const vboolf4& a) : v(_mm_castps_si128((__m128)a)) {} __forceinline vint(long long a, long long b) : v(_mm_set_epi64x(b,a)) {} //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline vint(ZeroTy) : v(_mm_setzero_si128()) {} __forceinline vint(OneTy) : v(_mm_set_epi32(1, 1, 1, 1)) {} __forceinline vint(PosInfTy) : v(_mm_set_epi32(pos_inf, pos_inf, pos_inf, pos_inf)) {} __forceinline vint(NegInfTy) : v(_mm_set_epi32(neg_inf, neg_inf, neg_inf, neg_inf)) {} __forceinline vint(StepTy) : v(_mm_set_epi32(3, 2, 1, 0)) {} __forceinline vint(ReverseStepTy) : v(_mm_set_epi32(0, 1, 2, 3)) {} __forceinline vint(TrueTy) { v = _mm_cmpeq_epi32(v,v); } __forceinline vint(UndefinedTy) : v(_mm_castps_si128(_mm_undefined_ps())) {} //////////////////////////////////////////////////////////////////////////////// /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// static __forceinline vint4 load (const void* a) { return _mm_load_si128((__m128i*)a); } static __forceinline vint4 loadu(const void* a) { return _mm_loadu_si128((__m128i*)a); } static __forceinline void store (void* ptr, const vint4& v) { _mm_store_si128((__m128i*)ptr,v); } static __forceinline void storeu(void* ptr, const vint4& v) { _mm_storeu_si128((__m128i*)ptr,v); } static __forceinline vint4 load (const vbool4& mask, const void* a) { return _mm_and_si128(_mm_load_si128 ((__m128i*)a),mask); } static __forceinline vint4 loadu(const vbool4& mask, const void* a) { return _mm_and_si128(_mm_loadu_si128((__m128i*)a),mask); } static __forceinline void store (const vboolf4& mask, void* ptr, const vint4& i) { store (ptr,select(mask,i,load (ptr))); } static __forceinline void storeu(const vboolf4& mask, void* ptr, const vint4& i) { storeu(ptr,select(mask,i,loadu(ptr))); } //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const int& operator [](size_t index) const { assert(index < 4); return i[index]; } __forceinline int& operator [](size_t index) { assert(index < 4); return i[index]; } friend __forceinline vint4 select(const vboolf4& m, const vint4& t, const vint4& f) { #if defined(__SSE4_1__) return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); #else return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); #endif } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 asBool(const vint4& a) { return _mm_castsi128_ps(a); } __forceinline vint4 operator +(const vint4& a) { return a; } __forceinline vint4 operator -(const vint4& a) { return _mm_sub_epi32(_mm_setzero_si128(), a); } #if defined(__SSSE3__) __forceinline vint4 abs(const vint4& a) { return _mm_abs_epi32(a); } #endif //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vint4 operator +(const vint4& a, const vint4& b) { return _mm_add_epi32(a, b); } __forceinline vint4 operator +(const vint4& a, int b) { return a + vint4(b); } __forceinline vint4 operator +(int a, const vint4& b) { return vint4(a) + b; } __forceinline vint4 operator -(const vint4& a, const vint4& b) { return _mm_sub_epi32(a, b); } __forceinline vint4 operator -(const vint4& a, int b) { return a - vint4(b); } __forceinline vint4 operator -(int a, const vint4& b) { return vint4(a) - b; } #if defined(__SSE4_1__) __forceinline vint4 operator *(const vint4& a, const vint4& b) { return _mm_mullo_epi32(a, b); } #else __forceinline vint4 operator *(const vint4& a, const vint4& b) { return vint4(a[0]*b[0],a[1]*b[1],a[2]*b[2],a[3]*b[3]); } #endif __forceinline vint4 operator *(const vint4& a, int b) { return a * vint4(b); } __forceinline vint4 operator *(int a, const vint4& b) { return vint4(a) * b; } __forceinline vint4 operator &(const vint4& a, const vint4& b) { return _mm_and_si128(a, b); } __forceinline vint4 operator &(const vint4& a, int b) { return a & vint4(b); } __forceinline vint4 operator &(int a, const vint4& b) { return vint4(a) & b; } __forceinline vint4 operator |(const vint4& a, const vint4& b) { return _mm_or_si128(a, b); } __forceinline vint4 operator |(const vint4& a, int b) { return a | vint4(b); } __forceinline vint4 operator |(int a, const vint4& b) { return vint4(a) | b; } __forceinline vint4 operator ^(const vint4& a, const vint4& b) { return _mm_xor_si128(a, b); } __forceinline vint4 operator ^(const vint4& a, int b) { return a ^ vint4(b); } __forceinline vint4 operator ^(int a, const vint4& b) { return vint4(a) ^ b; } __forceinline vint4 operator <<(const vint4& a, const int n) { return _mm_slli_epi32(a, n); } __forceinline vint4 operator >>(const vint4& a, const int n) { return _mm_srai_epi32(a, n); } __forceinline vint4 sll (const vint4& a, int b) { return _mm_slli_epi32(a, b); } __forceinline vint4 sra (const vint4& a, int b) { return _mm_srai_epi32(a, b); } __forceinline vint4 srl (const vint4& a, int b) { return _mm_srli_epi32(a, b); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vint4& operator +=(vint4& a, const vint4& b) { return a = a + b; } __forceinline vint4& operator +=(vint4& a, int b) { return a = a + b; } __forceinline vint4& operator -=(vint4& a, const vint4& b) { return a = a - b; } __forceinline vint4& operator -=(vint4& a, int b) { return a = a - b; } #if defined(__SSE4_1__) __forceinline vint4& operator *=(vint4& a, const vint4& b) { return a = a * b; } __forceinline vint4& operator *=(vint4& a, int b) { return a = a * b; } #endif __forceinline vint4& operator &=(vint4& a, const vint4& b) { return a = a & b; } __forceinline vint4& operator &=(vint4& a, int b) { return a = a & b; } __forceinline vint4& operator |=(vint4& a, const vint4& b) { return a = a | b; } __forceinline vint4& operator |=(vint4& a, int b) { return a = a | b; } __forceinline vint4& operator <<=(vint4& a, int b) { return a = a << b; } __forceinline vint4& operator >>=(vint4& a, int b) { return a = a >> b; } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 operator ==(const vint4& a, const vint4& b) { return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); } __forceinline vboolf4 operator !=(const vint4& a, const vint4& b) { return !(a == b); } __forceinline vboolf4 operator < (const vint4& a, const vint4& b) { return _mm_castsi128_ps(_mm_cmplt_epi32(a, b)); } __forceinline vboolf4 operator >=(const vint4& a, const vint4& b) { return !(a < b); } __forceinline vboolf4 operator > (const vint4& a, const vint4& b) { return _mm_castsi128_ps(_mm_cmpgt_epi32(a, b)); } __forceinline vboolf4 operator <=(const vint4& a, const vint4& b) { return !(a > b); } __forceinline vboolf4 operator ==(const vint4& a, int b) { return a == vint4(b); } __forceinline vboolf4 operator ==(int a, const vint4& b) { return vint4(a) == b; } __forceinline vboolf4 operator !=(const vint4& a, int b) { return a != vint4(b); } __forceinline vboolf4 operator !=(int a, const vint4& b) { return vint4(a) != b; } __forceinline vboolf4 operator < (const vint4& a, int b) { return a < vint4(b); } __forceinline vboolf4 operator < (int a, const vint4& b) { return vint4(a) < b; } __forceinline vboolf4 operator >=(const vint4& a, int b) { return a >= vint4(b); } __forceinline vboolf4 operator >=(int a, const vint4& b) { return vint4(a) >= b; } __forceinline vboolf4 operator > (const vint4& a, int b) { return a > vint4(b); } __forceinline vboolf4 operator > (int a, const vint4& b) { return vint4(a) > b; } __forceinline vboolf4 operator <=(const vint4& a, int b) { return a <= vint4(b); } __forceinline vboolf4 operator <=(int a, const vint4& b) { return vint4(a) <= b; } __forceinline vboolf4 eq(const vint4& a, const vint4& b) { return a == b; } __forceinline vboolf4 ne(const vint4& a, const vint4& b) { return a != b; } __forceinline vboolf4 lt(const vint4& a, const vint4& b) { return a < b; } __forceinline vboolf4 ge(const vint4& a, const vint4& b) { return a >= b; } __forceinline vboolf4 gt(const vint4& a, const vint4& b) { return a > b; } __forceinline vboolf4 le(const vint4& a, const vint4& b) { return a <= b; } __forceinline vboolf4 eq(const vboolf4& mask, const vint4& a, const vint4& b) { return mask & (a == b); } __forceinline vboolf4 ne(const vboolf4& mask, const vint4& a, const vint4& b) { return mask & (a != b); } __forceinline vboolf4 lt(const vboolf4& mask, const vint4& a, const vint4& b) { return mask & (a < b); } __forceinline vboolf4 ge(const vboolf4& mask, const vint4& a, const vint4& b) { return mask & (a >= b); } __forceinline vboolf4 gt(const vboolf4& mask, const vint4& a, const vint4& b) { return mask & (a > b); } __forceinline vboolf4 le(const vboolf4& mask, const vint4& a, const vint4& b) { return mask & (a <= b); } template __forceinline vint4 select(const vint4& t, const vint4& f) { #if defined(__SSE4_1__) return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); #else return select(vboolf4(mask), t, f); #endif } #if defined(__SSE4_1__) __forceinline vint4 min(const vint4& a, const vint4& b) { return _mm_min_epi32(a, b); } __forceinline vint4 max(const vint4& a, const vint4& b) { return _mm_max_epi32(a, b); } __forceinline vint4 umin(const vint4& a, const vint4& b) { return _mm_min_epu32(a, b); } __forceinline vint4 umax(const vint4& a, const vint4& b) { return _mm_max_epu32(a, b); } #else __forceinline vint4 min(const vint4& a, const vint4& b) { return select(a < b,a,b); } __forceinline vint4 max(const vint4& a, const vint4& b) { return select(a < b,b,a); } #endif __forceinline vint4 min(const vint4& a, int b) { return min(a,vint4(b)); } __forceinline vint4 min(int a, const vint4& b) { return min(vint4(a),b); } __forceinline vint4 max(const vint4& a, int b) { return max(a,vint4(b)); } __forceinline vint4 max(int a, const vint4& b) { return max(vint4(a),b); } //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// __forceinline vint4 unpacklo(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } __forceinline vint4 unpackhi(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } template __forceinline vint4 shuffle(const vint4& v) { return _mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0)); } template __forceinline vint4 shuffle(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } #if defined(__SSE3__) template<> __forceinline vint4 shuffle<0, 0, 2, 2>(const vint4& v) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(v))); } template<> __forceinline vint4 shuffle<1, 1, 3, 3>(const vint4& v) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(v))); } template<> __forceinline vint4 shuffle<0, 1, 0, 1>(const vint4& v) { return _mm_castpd_si128(_mm_movedup_pd (_mm_castsi128_pd(v))); } #endif template __forceinline vint4 shuffle(const vint4& v) { return shuffle(v); } #if defined(__SSE4_1__) template __forceinline int extract(const vint4& b) { return _mm_extract_epi32(b, src); } template __forceinline vint4 insert(const vint4& a, const int b) { return _mm_insert_epi32(a, b, dst); } #else template __forceinline int extract(const vint4& b) { return b[src&3]; } template __forceinline vint4 insert(const vint4& a, int b) { vint4 c = a; c[dst&3] = b; return c; } #endif template<> __forceinline int extract<0>(const vint4& b) { return _mm_cvtsi128_si32(b); } __forceinline int toScalar(const vint4& v) { return _mm_cvtsi128_si32(v); } //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// #if defined(__SSE4_1__) __forceinline vint4 vreduce_min(const vint4& v) { vint4 h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); } __forceinline vint4 vreduce_max(const vint4& v) { vint4 h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); } __forceinline vint4 vreduce_add(const vint4& v) { vint4 h = shuffle<1,0,3,2>(v) + v ; return shuffle<2,3,0,1>(h) + h ; } __forceinline int reduce_min(const vint4& v) { return toScalar(vreduce_min(v)); } __forceinline int reduce_max(const vint4& v) { return toScalar(vreduce_max(v)); } __forceinline int reduce_add(const vint4& v) { return toScalar(vreduce_add(v)); } __forceinline size_t select_min(const vint4& v) { return bsf(movemask(v == vreduce_min(v))); } __forceinline size_t select_max(const vint4& v) { return bsf(movemask(v == vreduce_max(v))); } __forceinline size_t select_min(const vboolf4& valid, const vint4& v) { const vint4 a = select(valid,v,vint4(pos_inf)); return bsf(movemask(valid & (a == vreduce_min(a)))); } __forceinline size_t select_max(const vboolf4& valid, const vint4& v) { const vint4 a = select(valid,v,vint4(neg_inf)); return bsf(movemask(valid & (a == vreduce_max(a)))); } #else __forceinline int reduce_min(const vint4& v) { return min(v[0],v[1],v[2],v[3]); } __forceinline int reduce_max(const vint4& v) { return max(v[0],v[1],v[2],v[3]); } __forceinline int reduce_add(const vint4& v) { return v[0]+v[1]+v[2]+v[3]; } #endif //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator <<(embree_ostream cout, const vint4& a) { return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ">"; } } #undef vboolf #undef vboold #undef vint #undef vuint #undef vllong #undef vfloat #undef vdouble level-zero-raytracing-support-1.2.3/rtbuild/simd/vuint4_sse2.h000066400000000000000000000353151514453371700244340ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../math/emath.h" #define vboolf vboolf_impl #define vboold vboold_impl #define vint vint_impl #define vuint vuint_impl #define vllong vllong_impl #define vfloat vfloat_impl #define vdouble vdouble_impl namespace embree { /* 4-wide SSE integer type */ template<> struct vuint<4> { ALIGNED_STRUCT_(16); typedef vboolf4 Bool; typedef vuint4 Int; typedef vfloat4 Float; enum { size = 4 }; // number of SIMD elements union { __m128i v; unsigned int i[4]; }; // data //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vuint() {} __forceinline vuint(const vuint4& a) { v = a.v; } __forceinline vuint4& operator =(const vuint4& a) { v = a.v; return *this; } __forceinline vuint(const __m128i a) : v(a) {} __forceinline operator const __m128i&() const { return v; } __forceinline operator __m128i&() { return v; } __forceinline vuint(unsigned int a) : v(_mm_set1_epi32(a)) {} __forceinline vuint(unsigned int a, unsigned int b, unsigned int c, unsigned int d) : v(_mm_set_epi32(d, c, b, a)) {} __forceinline explicit vuint(const vboolf4& a) : v(_mm_castps_si128((__m128)a)) {} //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline vuint(ZeroTy) : v(_mm_setzero_si128()) {} __forceinline vuint(OneTy) : v(_mm_set1_epi32(1)) {} __forceinline vuint(PosInfTy) : v(_mm_set1_epi32(unsigned(pos_inf))) {} __forceinline vuint(StepTy) : v(_mm_set_epi32(3, 2, 1, 0)) {} __forceinline vuint(TrueTy) { v = _mm_cmpeq_epi32(v,v); } __forceinline vuint(UndefinedTy) : v(_mm_castps_si128(_mm_undefined_ps())) {} //////////////////////////////////////////////////////////////////////////////// /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// static __forceinline vuint4 load (const void* a) { return _mm_load_si128((__m128i*)a); } static __forceinline vuint4 loadu(const void* a) { return _mm_loadu_si128((__m128i*)a); } static __forceinline void store (void* ptr, const vuint4& v) { _mm_store_si128((__m128i*)ptr,v); } static __forceinline void storeu(void* ptr, const vuint4& v) { _mm_storeu_si128((__m128i*)ptr,v); } static __forceinline vuint4 load (const vbool4& mask, const void* a) { return _mm_and_si128(_mm_load_si128 ((__m128i*)a),mask); } static __forceinline vuint4 loadu(const vbool4& mask, const void* a) { return _mm_and_si128(_mm_loadu_si128((__m128i*)a),mask); } static __forceinline void store (const vboolf4& mask, void* ptr, const vuint4& i) { store (ptr,select(mask,i,load (ptr))); } static __forceinline void storeu(const vboolf4& mask, void* ptr, const vuint4& i) { storeu(ptr,select(mask,i,loadu(ptr))); } //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// __forceinline const unsigned int& operator [](size_t index) const { assert(index < 4); return i[index]; } __forceinline unsigned int& operator [](size_t index) { assert(index < 4); return i[index]; } friend __forceinline vuint4 select(const vboolf4& m, const vuint4& t, const vuint4& f) { #if defined(__SSE4_1__) return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); #else return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); #endif } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 asBool(const vuint4& a) { return _mm_castsi128_ps(a); } __forceinline vuint4 operator +(const vuint4& a) { return a; } __forceinline vuint4 operator -(const vuint4& a) { return _mm_sub_epi32(_mm_setzero_si128(), a); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vuint4 operator +(const vuint4& a, const vuint4& b) { return _mm_add_epi32(a, b); } __forceinline vuint4 operator +(const vuint4& a, unsigned int b) { return a + vuint4(b); } __forceinline vuint4 operator +(unsigned int a, const vuint4& b) { return vuint4(a) + b; } __forceinline vuint4 operator -(const vuint4& a, const vuint4& b) { return _mm_sub_epi32(a, b); } __forceinline vuint4 operator -(const vuint4& a, unsigned int b) { return a - vuint4(b); } __forceinline vuint4 operator -(unsigned int a, const vuint4& b) { return vuint4(a) - b; } //#if defined(__SSE4_1__) // __forceinline vuint4 operator *(const vuint4& a, const vuint4& b) { return _mm_mullo_epu32(a, b); } //#else // __forceinline vuint4 operator *(const vuint4& a, const vuint4& b) { return vuint4(a[0]*b[0],a[1]*b[1],a[2]*b[2],a[3]*b[3]); } //#endif // __forceinline vuint4 operator *(const vuint4& a, unsigned int b) { return a * vuint4(b); } // __forceinline vuint4 operator *(unsigned int a, const vuint4& b) { return vuint4(a) * b; } __forceinline vuint4 operator &(const vuint4& a, const vuint4& b) { return _mm_and_si128(a, b); } __forceinline vuint4 operator &(const vuint4& a, unsigned int b) { return a & vuint4(b); } __forceinline vuint4 operator &(unsigned int a, const vuint4& b) { return vuint4(a) & b; } __forceinline vuint4 operator |(const vuint4& a, const vuint4& b) { return _mm_or_si128(a, b); } __forceinline vuint4 operator |(const vuint4& a, unsigned int b) { return a | vuint4(b); } __forceinline vuint4 operator |(unsigned int a, const vuint4& b) { return vuint4(a) | b; } __forceinline vuint4 operator ^(const vuint4& a, const vuint4& b) { return _mm_xor_si128(a, b); } __forceinline vuint4 operator ^(const vuint4& a, unsigned int b) { return a ^ vuint4(b); } __forceinline vuint4 operator ^(unsigned int a, const vuint4& b) { return vuint4(a) ^ b; } __forceinline vuint4 operator <<(const vuint4& a, unsigned int n) { return _mm_slli_epi32(a, n); } __forceinline vuint4 operator >>(const vuint4& a, unsigned int n) { return _mm_srli_epi32(a, n); } __forceinline vuint4 sll (const vuint4& a, unsigned int b) { return _mm_slli_epi32(a, b); } __forceinline vuint4 sra (const vuint4& a, unsigned int b) { return _mm_srai_epi32(a, b); } __forceinline vuint4 srl (const vuint4& a, unsigned int b) { return _mm_srli_epi32(a, b); } //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vuint4& operator +=(vuint4& a, const vuint4& b) { return a = a + b; } __forceinline vuint4& operator +=(vuint4& a, unsigned int b) { return a = a + b; } __forceinline vuint4& operator -=(vuint4& a, const vuint4& b) { return a = a - b; } __forceinline vuint4& operator -=(vuint4& a, unsigned int b) { return a = a - b; } //#if defined(__SSE4_1__) // __forceinline vuint4& operator *=(vuint4& a, const vuint4& b) { return a = a * b; } // __forceinline vuint4& operator *=(vuint4& a, unsigned int b) { return a = a * b; } //#endif __forceinline vuint4& operator &=(vuint4& a, const vuint4& b) { return a = a & b; } __forceinline vuint4& operator &=(vuint4& a, unsigned int b) { return a = a & b; } __forceinline vuint4& operator |=(vuint4& a, const vuint4& b) { return a = a | b; } __forceinline vuint4& operator |=(vuint4& a, unsigned int b) { return a = a | b; } __forceinline vuint4& operator <<=(vuint4& a, unsigned int b) { return a = a << b; } __forceinline vuint4& operator >>=(vuint4& a, unsigned int b) { return a = a >> b; } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf4 operator ==(const vuint4& a, const vuint4& b) { return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); } __forceinline vboolf4 operator !=(const vuint4& a, const vuint4& b) { return !(a == b); } //__forceinline vboolf4 operator < (const vuint4& a, const vuint4& b) { return _mm_castsi128_ps(_mm_cmplt_epu32(a, b)); } //__forceinline vboolf4 operator >=(const vuint4& a, const vuint4& b) { return !(a < b); } //__forceinline vboolf4 operator > (const vuint4& a, const vuint4& b) { return _mm_castsi128_ps(_mm_cmpgt_epu32(a, b)); } //__forceinline vboolf4 operator <=(const vuint4& a, const vuint4& b) { return !(a > b); } __forceinline vboolf4 operator ==(const vuint4& a, unsigned int b) { return a == vuint4(b); } __forceinline vboolf4 operator ==(unsigned int a, const vuint4& b) { return vuint4(a) == b; } __forceinline vboolf4 operator !=(const vuint4& a, unsigned int b) { return a != vuint4(b); } __forceinline vboolf4 operator !=(unsigned int a, const vuint4& b) { return vuint4(a) != b; } //__forceinline vboolf4 operator < (const vuint4& a, unsigned int b) { return a < vuint4(b); } //__forceinline vboolf4 operator < (unsigned int a, const vuint4& b) { return vuint4(a) < b; } //__forceinline vboolf4 operator >=(const vuint4& a, unsigned int b) { return a >= vuint4(b); } //__forceinline vboolf4 operator >=(unsigned int a, const vuint4& b) { return vuint4(a) >= b; } //__forceinline vboolf4 operator > (const vuint4& a, unsigned int b) { return a > vuint4(b); } //__forceinline vboolf4 operator > (unsigned int a, const vuint4& b) { return vuint4(a) > b; } //__forceinline vboolf4 operator <=(const vuint4& a, unsigned int b) { return a <= vuint4(b); } //__forceinline vboolf4 operator <=(unsigned int a, const vuint4& b) { return vuint4(a) <= b; } __forceinline vboolf4 eq(const vuint4& a, const vuint4& b) { return a == b; } __forceinline vboolf4 ne(const vuint4& a, const vuint4& b) { return a != b; } //__forceinline vboolf4 lt(const vuint4& a, const vuint4& b) { return a < b; } //__forceinline vboolf4 ge(const vuint4& a, const vuint4& b) { return a >= b; } //__forceinline vboolf4 gt(const vuint4& a, const vuint4& b) { return a > b; } //__forceinline vboolf4 le(const vuint4& a, const vuint4& b) { return a <= b; } __forceinline vboolf4 eq(const vboolf4& mask, const vuint4& a, const vuint4& b) { return mask & (a == b); } __forceinline vboolf4 ne(const vboolf4& mask, const vuint4& a, const vuint4& b) { return mask & (a != b); } //__forceinline vboolf4 lt(const vboolf4& mask, const vuint4& a, const vuint4& b) { return mask & (a < b); } //__forceinline vboolf4 ge(const vboolf4& mask, const vuint4& a, const vuint4& b) { return mask & (a >= b); } //__forceinline vboolf4 gt(const vboolf4& mask, const vuint4& a, const vuint4& b) { return mask & (a > b); } //__forceinline vboolf4 le(const vboolf4& mask, const vuint4& a, const vuint4& b) { return mask & (a <= b); } template __forceinline vuint4 select(const vuint4& t, const vuint4& f) { #if defined(__SSE4_1__) return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); #else return select(vboolf4(mask), t, f); #endif } /*#if defined(__SSE4_1__) __forceinline vuint4 min(const vuint4& a, const vuint4& b) { return _mm_min_epu32(a, b); } __forceinline vuint4 max(const vuint4& a, const vuint4& b) { return _mm_max_epu32(a, b); } #else __forceinline vuint4 min(const vuint4& a, const vuint4& b) { return select(a < b,a,b); } __forceinline vuint4 max(const vuint4& a, const vuint4& b) { return select(a < b,b,a); } #endif __forceinline vuint4 min(const vuint4& a, unsigned int b) { return min(a,vuint4(b)); } __forceinline vuint4 min(unsigned int a, const vuint4& b) { return min(vuint4(a),b); } __forceinline vuint4 max(const vuint4& a, unsigned int b) { return max(a,vuint4(b)); } __forceinline vuint4 max(unsigned int a, const vuint4& b) { return max(vuint4(a),b); }*/ //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// __forceinline vuint4 unpacklo(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } __forceinline vuint4 unpackhi(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } template __forceinline vuint4 shuffle(const vuint4& v) { return _mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0)); } template __forceinline vuint4 shuffle(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } #if defined(__SSE3__) template<> __forceinline vuint4 shuffle<0, 0, 2, 2>(const vuint4& v) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(v))); } template<> __forceinline vuint4 shuffle<1, 1, 3, 3>(const vuint4& v) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(v))); } template<> __forceinline vuint4 shuffle<0, 1, 0, 1>(const vuint4& v) { return _mm_castpd_si128(_mm_movedup_pd (_mm_castsi128_pd(v))); } #endif template __forceinline vuint4 shuffle(const vuint4& v) { return shuffle(v); } #if defined(__SSE4_1__) template __forceinline unsigned int extract(const vuint4& b) { return _mm_extract_epi32(b, src); } template __forceinline vuint4 insert(const vuint4& a, const unsigned b) { return _mm_insert_epi32(a, b, dst); } #else template __forceinline unsigned int extract(const vuint4& b) { return b[src&3]; } template __forceinline vuint4 insert(const vuint4& a, const unsigned b) { vuint4 c = a; c[dst&3] = b; return c; } #endif template<> __forceinline unsigned int extract<0>(const vuint4& b) { return _mm_cvtsi128_si32(b); } __forceinline unsigned int toScalar(const vuint4& v) { return _mm_cvtsi128_si32(v); } //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// __forceinline embree_ostream operator <<(embree_ostream cout, const vuint4& a) { return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ">"; } } #undef vboolf #undef vboold #undef vint #undef vuint #undef vllong #undef vfloat #undef vdouble level-zero-raytracing-support-1.2.3/rtbuild/statistics.cpp000066400000000000000000000200051514453371700240260ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "statistics.h" namespace embree { class RestoreStreamState { public: RestoreStreamState(std::ostream& iostream) : iostream(iostream), flags(iostream.flags()), precision(iostream.precision()) { } ~RestoreStreamState() { iostream.flags(flags); iostream.precision(precision); } private: std::ostream& iostream; std::ios::fmtflags flags; std::streamsize precision; }; double ratio(double a, double b) { if (b == 0.0) return 0.0f; else return a/b; } double percent(double a, double b) { return 100.0*ratio(a,b); } double ratio(size_t a, size_t b) { return ratio(double(a), double(b)); } double percent(size_t a, size_t b) { return percent(double(a), double(b)); } void BVHStatistics::NodeStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const { RestoreStreamState iostate(cout); cout << std::setw(7) << numNodes << " "; cout << std::setw(7) << std::setprecision(3) << sah(); cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% "; cout << std::setw(8) << std::setprecision(2) << bytes()/1E6 << " MB "; cout << std::setw(7) << std::setprecision(2) << percent(numBytes,numBytes) << "% "; cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% "; cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numNodes) << " "; cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numChildrenUsed) << " "; cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " "; cout << std::setw(7) << std::setprecision(2) << ratio(numChildrenUsed,numNodes) << " "; cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% "; cout << std::endl; } void BVHStatistics::LeafStat::print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks) const { RestoreStreamState iostate(cout); size_t N = blocks ? numBlocks : numLeaves; cout << std::setw(7) << N << " "; cout << std::setw(7) << std::setprecision(3) << sah(); cout << std::setw(7) << std::setprecision(2) << percent(sah(),totalSAH) << "% "; cout << std::setw(8) << std::setprecision(2) << double(bytes())/1E6 << " MB "; cout << std::setw(7) << std::setprecision(2) << percent(numBytesUsed,numBytesTotal) << "% "; cout << std::setw(7) << std::setprecision(2) << percent(bytes(),totalBytes) << "% "; cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),N) << " "; cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimsUsed) << " "; cout << std::setw(8) << std::setprecision(2) << ratio(bytes(),numPrimitives) << " "; cout << std::setw(7) << std::setprecision(2) << ratio(numPrimsUsed,N) << " "; cout << std::setw(7) << std::setprecision(2) << 100.0*fillRate() << "% "; cout << std::endl; } void BVHStatistics::print (std::ostream& cout) const { RestoreStreamState iostate(cout); cout.setf(std::ios::fixed, std::ios::floatfield); cout.fill(' '); double totalSAH = internalNode.nodeSAH + quadLeaf.leafSAH + proceduralLeaf.leafSAH + instanceLeaf.leafSAH; size_t totalBytes = internalNode.bytes() + quadLeaf.bytes() + proceduralLeaf.bytes() + instanceLeaf.bytes(); size_t totalNodes = internalNode.numNodes + quadLeaf.numLeaves + proceduralLeaf.numLeaves + instanceLeaf.numLeaves; size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed; cout << std::endl; cout << "BVH statistics:" << std::endl; cout << "---------------" << std::endl; cout << " numScenePrimitives = " << numScenePrimitives << std::endl; cout << " numBuildPrimitives = " << numBuildPrimitives << std::endl; cout << " numBuildPrimitivesPostSplit = " << numBuildPrimitivesPostSplit << std::endl; cout << " primRefSplits = " << std::setprecision(2) << percent(numBuildPrimitivesPostSplit,numBuildPrimitives) << "%" << std::endl; cout << " numBVHPrimitives = " << totalPrimitives << std::endl; cout << " spatialSplits = " << std::setprecision(2) << percent(totalPrimitives,numScenePrimitives) << "%" << std::endl; cout << std::endl; cout << " #nodes SAH total bytes used total b/node b/child b/prim #child fill" << std::endl; cout << "----------------------------------------------------------------------------------------------------------------------" << std::endl; cout << " total : "; cout << std::setw(7) << totalNodes << " "; cout << std::setw(7) << std::setprecision(3) << totalSAH; cout << " 100.00% "; cout << std::setw(8) << std::setprecision(2) << totalBytes/1E6 << " MB "; cout << " 100.00% "; cout << " 100.00% "; cout << " "; cout << " "; cout << std::setw(8) << std::setprecision(2) << ratio(totalBytes,totalPrimitives) << std::endl; LeafStat leaf = quadLeaf + proceduralLeaf + instanceLeaf; cout << " internalNode : "; internalNode .print(cout,totalSAH,totalBytes,totalPrimitives); cout << " leaves : "; leaf .print(cout,totalSAH,totalBytes,totalPrimitives); cout << " quadLeaf : "; quadLeaf .print(cout,totalSAH,totalBytes,totalPrimitives); cout << " proceduralLeaf : "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives); cout << " proceduralBlock: "; proceduralLeaf.print(cout,totalSAH,totalBytes,totalPrimitives,true); cout << " instanceLeaf : "; instanceLeaf .print(cout,totalSAH,totalBytes,totalPrimitives); } void BVHStatistics::print_raw(std::ostream& cout) const { RestoreStreamState iostate(cout); size_t totalPrimitives = quadLeaf.numPrimsUsed + proceduralLeaf.numPrimsUsed + instanceLeaf.numPrimsUsed; cout << "bvh_spatial_split_factor = " << percent(totalPrimitives,numBuildPrimitives) << std::endl; cout << "bvh_internal_sah = " << internalNode.nodeSAH << std::endl; cout << "bvh_internal_num = " << internalNode.numNodes << std::endl; cout << "bvh_internal_num_children_used = " << internalNode.numChildrenUsed << std::endl; cout << "bvh_internal_num_children_total = " << internalNode.numChildrenTotal << std::endl; cout << "bvh_internal_num_bytes = " << internalNode.bytes() << std::endl; cout << "bvh_quad_leaf_sah = " << quadLeaf.leafSAH << std::endl; cout << "bvh_quad_leaf_num = " << quadLeaf.numLeaves << std::endl; cout << "bvh_quad_leaf_num_prims_used = " << quadLeaf.numPrimsUsed << std::endl; cout << "bvh_quad_leaf_num_prims_total = " << quadLeaf.numPrimsTotal << std::endl; cout << "bvh_quad_leaf_num_bytes_used = " << quadLeaf.numBytesUsed << std::endl; cout << "bvh_quad_leaf_num_bytes_total = " << quadLeaf.numBytesTotal << std::endl; cout << "bvh_procedural_leaf_sah = " << proceduralLeaf.leafSAH << std::endl; cout << "bvh_procedural_leaf_num = " << proceduralLeaf.numLeaves << std::endl; cout << "bvh_procedural_leaf_num_prims_used = " << proceduralLeaf.numPrimsUsed << std::endl; cout << "bvh_procedural_leaf_num_prims_total = " << proceduralLeaf.numPrimsTotal << std::endl; cout << "bvh_procedural_leaf_num_bytes_used = " << proceduralLeaf.numBytesUsed << std::endl; cout << "bvh_procedural_leaf_num_bytes_total = " << proceduralLeaf.numBytesTotal << std::endl; cout << "bvh_instance_leaf_sah = " << instanceLeaf.leafSAH << std::endl; cout << "bvh_instance_leaf_num = " << instanceLeaf.numLeaves << std::endl; cout << "bvh_instance_leaf_num_prims_used = " << instanceLeaf.numPrimsUsed << std::endl; cout << "bvh_instance_leaf_num_prims_total = " << instanceLeaf.numPrimsTotal << std::endl; cout << "bvh_instance_leaf_num_bytes_used = " << instanceLeaf.numBytesUsed << std::endl; cout << "bvh_instance_leaf_num_bytes_total = " << instanceLeaf.numBytesTotal << std::endl; } } level-zero-raytracing-support-1.2.3/rtbuild/statistics.h000066400000000000000000000100341514453371700234740ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if defined(ZE_RAYTRACING) #include "sys/platform.h" #else #include "../../../common/sys/platform.h" #endif namespace embree { struct BVHStatistics { struct NodeStat { NodeStat ( double nodeSAH = 0, size_t numNodes = 0, size_t numChildrenUsed = 0, size_t numChildrenTotal = 0, size_t numBytes = 0) : nodeSAH(nodeSAH), numNodes(numNodes), numChildrenUsed(numChildrenUsed), numChildrenTotal(numChildrenTotal), numBytes(numBytes) {} double sah() const { return nodeSAH; } size_t bytes() const { return numBytes; } size_t size() const { return numNodes; } double fillRateNom () const { return double(numChildrenUsed); } double fillRateDen () const { return double(numChildrenTotal); } double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; } friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b) { return NodeStat(a.nodeSAH + b.nodeSAH, a.numNodes+b.numNodes, a.numChildrenUsed+b.numChildrenUsed, a.numChildrenTotal+b.numChildrenTotal, a.numBytes+b.numBytes); } void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives) const; public: double nodeSAH; size_t numNodes; size_t numChildrenUsed; size_t numChildrenTotal; size_t numBytes; }; struct LeafStat { LeafStat(double leafSAH = 0.0f, size_t numLeaves = 0, size_t numBlocks = 0, size_t numPrimsUsed = 0, size_t numPrimsTotal = 0, size_t numBytesUsed = 0, size_t numBytesTotal = 0) : leafSAH(leafSAH), numLeaves(numLeaves), numBlocks(numBlocks), numPrimsUsed(numPrimsUsed), numPrimsTotal(numPrimsTotal), numBytesUsed(numBytesUsed), numBytesTotal(numBytesTotal) {} double sah() const { return leafSAH; } size_t bytes() const { return numBytesTotal; } size_t size() const { return numLeaves; } double fillRateNom () const { return double(numPrimsUsed); } double fillRateDen () const { return double(numPrimsTotal); } double fillRate () const { return fillRateDen() ? fillRateNom()/fillRateDen() : 0.0; } friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b) { return LeafStat(a.leafSAH + b.leafSAH, a.numLeaves+b.numLeaves, a.numBlocks+b.numBlocks, a.numPrimsUsed+b.numPrimsUsed, a.numPrimsTotal+b.numPrimsTotal, a.numBytesUsed+b.numBytesUsed, a.numBytesTotal+b.numBytesTotal); } void print(std::ostream& cout, double totalSAH, size_t totalBytes, size_t numPrimitives, bool blocks = false) const; public: double leafSAH; //!< SAH of the leaves only size_t numLeaves; //!< Number of leaf nodes. size_t numBlocks; //!< Number of blocks referenced size_t numPrimsUsed; //!< Number of active primitives size_t numPrimsTotal; //!< Number of active and inactive primitives size_t numBytesUsed; //!< Number of used bytes size_t numBytesTotal; //!< Number of total bytes of leaves. }; BVHStatistics () : numScenePrimitives(0), numBuildPrimitives(0), numBuildPrimitivesPostSplit(0) {} void print (std::ostream& cout) const; void print_raw(std::ostream& cout) const; size_t numScenePrimitives; size_t numBuildPrimitives; size_t numBuildPrimitivesPostSplit; NodeStat internalNode; LeafStat quadLeaf; LeafStat proceduralLeaf; LeafStat instanceLeaf; }; } level-zero-raytracing-support-1.2.3/rtbuild/sys/000077500000000000000000000000001514453371700217515ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rtbuild/sys/CMakeLists.txt000066400000000000000000000012721514453371700245130ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) FIND_PACKAGE(Threads REQUIRED) ADD_LIBRARY(sys STATIC sysinfo.cpp alloc.cpp ) SET_PROPERTY(TARGET sys PROPERTY FOLDER common) SET_PROPERTY(TARGET sys APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}") TARGET_LINK_LIBRARIES(sys ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) IF (EMBREE_SYCL_SUPPORT) TARGET_LINK_LIBRARIES(sys ${SYCL_LIB_NAME}) ENDIF() IF (EMBREE_STATIC_LIB) INSTALL(TARGETS sys EXPORT sys-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT devel) INSTALL(EXPORT sys-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) ENDIF() level-zero-raytracing-support-1.2.3/rtbuild/sys/alloc.cpp000066400000000000000000000206161514453371700235540ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "alloc.h" #include "intrinsics.h" #include "sysinfo.h" //////////////////////////////////////////////////////////////////////////////// /// All Platforms //////////////////////////////////////////////////////////////////////////////// namespace embree { size_t total_allocations = 0; #if defined(EMBREE_SYCL_SUPPORT) __thread sycl::context* tls_context_tutorial = nullptr; __thread sycl::device* tls_device_tutorial = nullptr; __thread sycl::context* tls_context_embree = nullptr; __thread sycl::device* tls_device_embree = nullptr; void enableUSMAllocEmbree(sycl::context* context, sycl::device* device) { if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); tls_context_embree = context; tls_device_embree = device; } void disableUSMAllocEmbree() { if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); tls_context_embree = nullptr; tls_device_embree = nullptr; } void enableUSMAllocTutorial(sycl::context* context, sycl::device* device) { //if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled"); //if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled"); tls_context_tutorial = context; tls_device_tutorial = device; } void disableUSMAllocTutorial() { if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); tls_context_tutorial = nullptr; tls_device_tutorial = nullptr; } #endif void* alignedMalloc(size_t size, size_t align) { if (size == 0) return nullptr; assert((align & (align-1)) == 0); void* ptr = _mm_malloc(size,align); if (size != 0 && ptr == nullptr) throw std::bad_alloc(); return ptr; } void alignedFree(void* ptr) { if (ptr) _mm_free(ptr); } #if defined(EMBREE_SYCL_SUPPORT) void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode) { assert(context); assert(device); if (size == 0) return nullptr; assert((align & (align-1)) == 0); total_allocations++; void* ptr = nullptr; if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY) ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only()); else ptr = sycl::aligned_alloc_shared(align,size,*device,*context); if (size != 0 && ptr == nullptr) throw std::bad_alloc(); return ptr; } void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode) { if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode); if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode); return nullptr; } void alignedSYCLFree(sycl::context* context, void* ptr) { assert(context); if (ptr) { sycl::free(ptr,*context); } } void alignedSYCLFree(void* ptr) { if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr); if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr); } #endif void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode) { #if defined(EMBREE_SYCL_SUPPORT) if (tls_context_embree || tls_context_tutorial) return alignedSYCLMalloc(size,align,mode); else #endif return alignedMalloc(size,align); } void alignedUSMFree(void* ptr) { #if defined(EMBREE_SYCL_SUPPORT) if (tls_context_embree || tls_context_tutorial) return alignedSYCLFree(ptr); else #endif return alignedFree(ptr); } static bool huge_pages_enabled = false; __forceinline bool isHugePageCandidate(const size_t bytes) { if (!huge_pages_enabled) return false; /* use huge pages only when memory overhead is low */ const size_t hbytes = (bytes+PAGE_SIZE_2M-1) & ~size_t(PAGE_SIZE_2M-1); return 66*(hbytes-bytes) < bytes; // at most 1.5% overhead } } //////////////////////////////////////////////////////////////////////////////// /// Windows Platform //////////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include #include namespace embree { void* os_malloc(size_t bytes, bool& hugepages) { if (bytes == 0) { hugepages = false; return nullptr; } /* try direct huge page allocation first */ if (isHugePageCandidate(bytes)) { int flags = MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES; char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE); if (ptr != nullptr) { hugepages = true; return ptr; } } /* fall back to 4k pages */ int flags = MEM_COMMIT | MEM_RESERVE; char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE); if (ptr == nullptr) throw std::bad_alloc(); hugepages = false; return ptr; } size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages) { if (hugepages) // decommitting huge pages seems not to work under Windows return bytesOld; const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1); bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1); if (bytesNew >= bytesOld) return bytesOld; if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) throw std::bad_alloc(); return bytesNew; } void os_free(void* ptr, size_t bytes, bool hugepages) { if (bytes == 0) return; if (!VirtualFree(ptr,0,MEM_RELEASE)) throw std::bad_alloc(); } void os_advise(void *ptr, size_t bytes) { } } #endif //////////////////////////////////////////////////////////////////////////////// /// Unix Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__UNIX__) #include #include #include #include #include #if defined(__MACOSX__) #include #endif namespace embree { void* os_malloc(size_t bytes, bool& hugepages) { if (bytes == 0) { hugepages = false; return nullptr; } /* try direct huge page allocation first */ if (isHugePageCandidate(bytes)) { #if defined(__MACOSX__) void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); if (ptr != MAP_FAILED) { hugepages = true; return ptr; } #elif defined(MAP_HUGETLB) void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0); if (ptr != MAP_FAILED) { hugepages = true; return ptr; } #endif } /* fallback to 4k pages */ void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (ptr == MAP_FAILED) throw std::bad_alloc(); hugepages = false; /* advise huge page hint for THP */ os_advise(ptr,bytes); return ptr; } size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages) { const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1); bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1); if (bytesNew >= bytesOld) return bytesOld; if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) throw std::bad_alloc(); return bytesNew; } void os_free(void* ptr, size_t bytes, bool hugepages) { if (bytes == 0) return; /* for hugepages we need to also align the size */ const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; bytes = (bytes+pageSize-1) & ~(pageSize-1); if (munmap(ptr,bytes) == -1) throw std::bad_alloc(); } /* hint for transparent huge pages (THP) */ void os_advise(void* pptr, size_t bytes) { #if defined(MADV_HUGEPAGE) madvise(pptr,bytes,MADV_HUGEPAGE); #endif } } #endif level-zero-raytracing-support-1.2.3/rtbuild/sys/alloc.h000066400000000000000000000153641514453371700232250ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "platform.h" #include #include namespace embree { #if defined(EMBREE_SYCL_SUPPORT) /* enables SYCL USM allocation */ void enableUSMAllocEmbree(sycl::context* context, sycl::device* device); void enableUSMAllocTutorial(sycl::context* context, sycl::device* device); /* disables SYCL USM allocation */ void disableUSMAllocEmbree(); void disableUSMAllocTutorial(); #endif #define ALIGNED_STRUCT_(align) \ void* operator new(size_t size) { return alignedMalloc(size,align); } \ void operator delete(void* ptr) { alignedFree(ptr); } \ void* operator new[](size_t size) { return alignedMalloc(size,align); } \ void operator delete[](void* ptr) { alignedFree(ptr); } #define ALIGNED_STRUCT_USM_(align) \ void* operator new(size_t size) { return alignedUSMMalloc(size,align); } \ void operator delete(void* ptr) { alignedUSMFree(ptr); } \ void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \ void operator delete[](void* ptr) { alignedUSMFree(ptr); } #define ALIGNED_CLASS_(align) \ public: \ ALIGNED_STRUCT_(align) \ private: #define ALIGNED_CLASS_USM_(align) \ public: \ ALIGNED_STRUCT_USM_(align) \ private: enum EmbreeUSMMode { EMBREE_USM_SHARED = 0, EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0, EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1 }; /*! aligned allocation */ void* alignedMalloc(size_t size, size_t align); void alignedFree(void* ptr); /*! aligned allocation using SYCL USM */ void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY); void alignedUSMFree(void* ptr); #if defined(EMBREE_SYCL_SUPPORT) /*! aligned allocation using SYCL USM */ void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode); void alignedSYCLFree(sycl::context* context, void* ptr); // deleter functor to use as deleter in std unique or shared pointers that // capture raw pointers created by sycl::malloc and it's variants template struct sycl_deleter { void operator()(T const* ptr) { alignedUSMFree((void*)ptr); } }; #endif /*! allocator that performs aligned allocations */ template struct aligned_allocator { typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; __forceinline pointer allocate( size_type n ) { return (pointer) alignedMalloc(n*sizeof(value_type),alignment); } __forceinline void deallocate( pointer p, size_type n ) { return alignedFree(p); } __forceinline void construct( pointer p, const_reference val ) { new (p) T(val); } __forceinline void destroy( pointer p ) { p->~T(); } }; /*! allocates pages directly from OS */ bool win_enable_selockmemoryprivilege(bool verbose); bool os_init(bool hugepages, bool verbose); void* os_malloc (size_t bytes, bool& hugepages); size_t os_shrink (void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages); void os_free (void* ptr, size_t bytes, bool hugepages); void os_advise (void* ptr, size_t bytes); /*! allocator that performs OS allocations */ template struct os_allocator { typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; __forceinline os_allocator () : hugepages(false) {} __forceinline pointer allocate( size_type n ) { return (pointer) os_malloc(n*sizeof(value_type),hugepages); } __forceinline void deallocate( pointer p, size_type n ) { return os_free(p,n*sizeof(value_type),hugepages); } __forceinline void construct( pointer p, const_reference val ) { new (p) T(val); } __forceinline void destroy( pointer p ) { p->~T(); } bool hugepages; }; /*! allocator that newer performs allocations */ template struct no_allocator { typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; __forceinline pointer allocate( size_type n ) { throw std::runtime_error("no allocation supported"); } __forceinline void deallocate( pointer p, size_type n ) { } __forceinline void construct( pointer p, const_reference val ) { new (p) T(val); } __forceinline void destroy( pointer p ) { p->~T(); } }; /*! allocator for IDs */ template struct IDPool { typedef T value_type; IDPool () : nextID(0) {} T allocate() { /* return ID from list */ if (!IDs.empty()) { T id = *IDs.begin(); IDs.erase(IDs.begin()); return id; } /* allocate new ID */ else { if (size_t(nextID)+1 > max_id) return -1; return nextID++; } } /* adds an ID provided by the user */ bool add(T id) { if (id > max_id) return false; /* check if ID should be in IDs set */ if (id < nextID) { auto p = IDs.find(id); if (p == IDs.end()) return false; IDs.erase(p); return true; } /* otherwise increase ID set */ else { for (T i=nextID; i IDs; //!< stores deallocated IDs to be reused T nextID; //!< next ID to use when IDs vector is empty }; } level-zero-raytracing-support-1.2.3/rtbuild/sys/array.h000066400000000000000000000152551514453371700232500ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "platform.h" #include "alloc.h" namespace embree { /*! static array with static size */ template class array_t { public: /********************** Iterators ****************************/ __forceinline T* begin() const { return items; }; __forceinline T* end () const { return items+N; }; /********************** Capacity ****************************/ __forceinline bool empty () const { return N == 0; } __forceinline size_t size () const { return N; } __forceinline size_t max_size () const { return N; } /******************** Element access **************************/ __forceinline T& operator[](size_t i) { assert(i < N); return items[i]; } __forceinline const T& operator[](size_t i) const { assert(i < N); return items[i]; } __forceinline T& at(size_t i) { assert(i < N); return items[i]; } __forceinline const T& at(size_t i) const { assert(i < N); return items[i]; } __forceinline T& front() const { assert(N > 0); return items[0]; }; __forceinline T& back () const { assert(N > 0); return items[N-1]; }; __forceinline T* data() { return items; }; __forceinline const T* data() const { return items; }; private: T items[N]; }; /*! static array with dynamic size */ template class darray_t { public: __forceinline darray_t () : M(0) {} __forceinline darray_t (const T& v) : M(0) { for (size_t i=0; i 0); return items[0]; }; __forceinline T& back () { assert(M > 0); return items[M-1]; }; __forceinline T* data() { return items; }; __forceinline const T* data() const { return items; }; private: size_t M; T items[N]; }; /*! dynamic sized array that is allocated on the stack */ #define dynamic_large_stack_array(Ty,Name,N,max_stack_bytes) StackArray Name(N) template struct __aligned(64) StackArray { __forceinline StackArray (const size_t N) : N(N) { if (N*sizeof(Ty) <= max_stack_bytes) data = &arr[0]; else data = (Ty*) alignedMalloc(N*sizeof(Ty),64); } __forceinline ~StackArray () { if (data != &arr[0]) alignedFree(data); } __forceinline operator Ty* () { return data; } __forceinline operator const Ty* () const { return data; } __forceinline Ty& operator[](const int i) { assert(i>=0 && i=0 && i struct __aligned(64) DynamicStackArray { __forceinline DynamicStackArray () : data(&arr[0]) {} __forceinline ~DynamicStackArray () { if (!isStackAllocated()) delete[] data; } __forceinline bool isStackAllocated() const { return data == &arr[0]; } __forceinline size_t size() const { if (isStackAllocated()) return max_stack_elements; else return max_total_elements; } __forceinline void resize(size_t M) { assert(M <= max_total_elements); if (likely(M <= max_stack_elements)) return; if (likely(!isStackAllocated())) return; data = new Ty[max_total_elements]; for (size_t i=0; i=0 && ioperator[] (i) = other[i]; } DynamicStackArray& operator= (const DynamicStackArray& other) { for (size_t i=0; ioperator[] (i) = other[i]; return *this; } private: Ty arr[max_stack_elements]; Ty* data; }; } level-zero-raytracing-support-1.2.3/rtbuild/sys/intrinsics.h000066400000000000000000000135451514453371700243170ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "platform.h" #if defined(__WIN32__) #include #endif #include #if defined(__WIN32__) # if !defined(NOMINMAX) # define NOMINMAX # endif # include #endif /* normally defined in pmmintrin.h, but we always need this */ #if !defined(_MM_SET_DENORMALS_ZERO_MODE) #define _MM_DENORMALS_ZERO_ON (0x0040) #define _MM_DENORMALS_ZERO_OFF (0x0000) #define _MM_DENORMALS_ZERO_MASK (0x0040) #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) #endif namespace embree { //////////////////////////////////////////////////////////////////////////////// /// Windows Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__WIN32__) && !defined(__INTEL_LLVM_COMPILER) __forceinline int bsf(int v) { unsigned long r = 0; _BitScanForward(&r,v); return r; } __forceinline unsigned bsf(unsigned v) { unsigned long r = 0; _BitScanForward(&r,v); return r; } #if defined(__X86_64__) __forceinline size_t bsf(size_t v) { unsigned long r = 0; _BitScanForward64(&r,v); return r; } #endif __forceinline int bscf(int& v) { int i = bsf(v); v &= v-1; return i; } __forceinline unsigned bscf(unsigned& v) { unsigned i = bsf(v); v &= v-1; return i; } #if defined(__X86_64__) __forceinline size_t bscf(size_t& v) { size_t i = bsf(v); v &= v-1; return i; } #endif __forceinline int bsr(int v) { unsigned long r = 0; _BitScanReverse(&r,v); return r; } __forceinline unsigned bsr(unsigned v) { unsigned long r = 0; _BitScanReverse(&r,v); return r; } #if defined(__X86_64__) __forceinline size_t bsr(size_t v) { unsigned long r = 0; _BitScanReverse64(&r, v); return r; } #endif __forceinline int lzcnt(const int x) { if (unlikely(x == 0)) return 32; return 31 - bsr(x); } //////////////////////////////////////////////////////////////////////////////// /// Unix Platform //////////////////////////////////////////////////////////////////////////////// #else __forceinline uint64_t read_tsc() { #if defined(__X86_ASM__) uint32_t high,low; asm volatile ("rdtsc" : "=d"(high), "=a"(low)); return (((uint64_t)high) << 32) + (uint64_t)low; #else /* Not supported yet, meaning measuring traversal cost per pixel does not work. */ return 0; #endif } __forceinline int bsf(int v) { #if defined(__X86_ASM__) int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_ctz(v); #endif } #if defined(__64BIT__) __forceinline unsigned bsf(unsigned v) { #if defined(__X86_ASM__) unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_ctz(v); #endif } #endif __forceinline size_t bsf(size_t v) { #if defined(__X86_ASM__) size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_ctzl(v); #endif } __forceinline int bscf(int& v) { int i = bsf(v); v &= v-1; return i; } #if defined(__64BIT__) __forceinline unsigned int bscf(unsigned int& v) { unsigned int i = bsf(v); v &= v-1; return i; } #endif __forceinline size_t bscf(size_t& v) { size_t i = bsf(v); v &= v-1; return i; } __forceinline int bsr(int v) { #if defined(__X86_ASM__) int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_clz(v) ^ 31; #endif } #if defined(__64BIT__) __forceinline unsigned bsr(unsigned v) { #if defined(__X86_ASM__) unsigned r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_clz(v) ^ 31; #endif } #endif __forceinline size_t bsr(size_t v) { #if defined(__X86_ASM__) size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; #else return (sizeof(v) * 8 - 1) - __builtin_clzl(v); #endif } __forceinline int lzcnt(const int x) { if (unlikely(x == 0)) return 32; return 31 - bsr(x); } #endif #if !defined(__WIN32__) #if defined(__i386__) && defined(__PIC__) __forceinline void __cpuid(int out[4], int op) { asm volatile ("xchg{l}\t{%%}ebx, %1\n\t" "cpuid\n\t" "xchg{l}\t{%%}ebx, %1\n\t" : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(op)); } __forceinline void __cpuid_count(int out[4], int op1, int op2) { asm volatile ("xchg{l}\t{%%}ebx, %1\n\t" "cpuid\n\t" "xchg{l}\t{%%}ebx, %1\n\t" : "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3]) : "0" (op1), "2" (op2)); } #elif defined(__X86_ASM__) __forceinline void __cpuid(int out[4], int op) { asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); } __forceinline void __cpuid_count(int out[4], int op1, int op2) { asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2)); } #endif #endif //////////////////////////////////////////////////////////////////////////////// /// All Platforms //////////////////////////////////////////////////////////////////////////////// #if defined(__clang__) || defined(__GNUC__) #if !defined(_mm_undefined_ps) __forceinline __m128 _mm_undefined_ps() { return _mm_setzero_ps(); } #endif #if !defined(_mm_undefined_si128) __forceinline __m128i _mm_undefined_si128() { return _mm_setzero_si128(); } #endif #endif #if defined(__SSE4_2__) __forceinline int popcnt(int in) { return _mm_popcnt_u32(in); } __forceinline unsigned popcnt(unsigned in) { return _mm_popcnt_u32(in); } #if defined(__64BIT__) __forceinline size_t popcnt(size_t in) { return _mm_popcnt_u64(in); } #endif #endif } level-zero-raytracing-support-1.2.3/rtbuild/sys/platform.h000066400000000000000000000533531514453371700237570ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if !defined(_CRT_SECURE_NO_WARNINGS) #define _CRT_SECURE_NO_WARNINGS #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(EMBREE_SYCL_SUPPORT) #define __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" #pragma clang diagnostic ignored "-W#pragma-messages" #include #pragma clang diagnostic pop #define SYCL_ONEAPI sycl #define SYCL_EXT_ONEAPI sycl::ext::oneapi #define SYCL_SUBGROUP sycl #define SYCL_EXPERIMENTAL sycl::ext::oneapi::experimental #define SYCL_INTEL sycl::ext::intel #define SYCL_CTZ sycl #include "sycl.h" #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) #define CONSTANT __attribute__((opencl_constant)) #else #define CONSTANT #endif #endif //////////////////////////////////////////////////////////////////////////////// /// detect platform //////////////////////////////////////////////////////////////////////////////// /* detect 32 or 64 Intel platform */ #if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) #define __X86_64__ #define __X86_ASM__ #elif defined(__i386__) || defined(_M_IX86) #define __X86_ASM__ #endif /* detect 64 bit platform */ #if defined(__X86_64__) #define __64BIT__ #endif /* detect Linux platform */ #if defined(linux) || defined(__linux__) || defined(__LINUX__) # if !defined(__LINUX__) # define __LINUX__ # endif # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* detect FreeBSD platform */ #if defined(__FreeBSD__) || defined(__FREEBSD__) # if !defined(__FREEBSD__) # define __FREEBSD__ # endif # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* detect Windows 95/98/NT/2000/XP/Vista/7/8/10 platform */ #if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__) # if !defined(__WIN32__) # define __WIN32__ # endif #endif /* detect Cygwin platform */ #if defined(__CYGWIN__) # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* detect MAC OS X platform */ #if defined(__APPLE__) || defined(MACOSX) || defined(__MACOSX__) # if !defined(__MACOSX__) # define __MACOSX__ # endif # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* try to detect other Unix systems */ #if defined(__unix__) || defined (unix) || defined(__unix) || defined(_unix) # if !defined(__UNIX__) # define __UNIX__ # endif #endif //////////////////////////////////////////////////////////////////////////////// /// Macros //////////////////////////////////////////////////////////////////////////////// #ifdef __WIN32__ # if defined(EMBREE_STATIC_LIB) # define dll_export # define dll_import # else # define dll_export __declspec(dllexport) # define dll_import __declspec(dllimport) # endif #else # define dll_export __attribute__ ((visibility ("default"))) # define dll_import #endif #if defined(__WIN32__) && !defined(__MINGW32__) #if !defined(__noinline) #define __noinline __declspec(noinline) #endif //#define __forceinline __forceinline //#define __restrict __restrict #if defined(__INTEL_COMPILER) #define __restrict__ __restrict #else #define __restrict__ //__restrict // causes issues with MSVC #endif #if !defined(__thread) && !defined(__INTEL_LLVM_COMPILER) #define __thread __declspec(thread) #endif #if !defined(__aligned) #define __aligned(...) __declspec(align(__VA_ARGS__)) #endif //#define __FUNCTION__ __FUNCTION__ #define debugbreak() __debugbreak() #else #if !defined(__noinline) #define __noinline __attribute__((noinline)) #endif #if !defined(__forceinline) #define __forceinline inline __attribute__((always_inline)) #endif //#define __restrict __restrict //#define __thread __thread #if !defined(__aligned) #define __aligned(...) __attribute__((aligned(__VA_ARGS__))) #endif #if !defined(__FUNCTION__) #define __FUNCTION__ __PRETTY_FUNCTION__ #endif #define debugbreak() asm ("int $3") #endif #if defined(__clang__) || defined(__GNUC__) #define MAYBE_UNUSED __attribute__((unused)) #else #define MAYBE_UNUSED #endif #if !defined(_unused) #define _unused(x) ((void)(x)) #endif #if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly #define DELETED #else #define DELETED = delete #endif #if !defined(likely) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) || defined(__SYCL_DEVICE_ONLY__) #define likely(expr) (expr) #define unlikely(expr) (expr) #else #define likely(expr) __builtin_expect((bool)(expr),true ) #define unlikely(expr) __builtin_expect((bool)(expr),false) #endif #endif //////////////////////////////////////////////////////////////////////////////// /// Error handling and debugging //////////////////////////////////////////////////////////////////////////////// /* debug printing macros */ #define STRING(x) #x #define TOSTRING(x) STRING(x) #define PING embree_cout_uniform << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl #define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl #define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl #define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl #define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl #define UPRINT(x) embree_cout_uniform << STRING(x) << " = " << (x) << embree_endl #define UPRINT2(x,y) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl #define UPRINT3(x,y,z) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl #define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl #if defined(DEBUG) // only report file and line in debug mode #define THROW_RUNTIME_ERROR(str) \ throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #else #define THROW_RUNTIME_ERROR(str) \ throw std::runtime_error(str); #endif #define FATAL(x) THROW_RUNTIME_ERROR(x) #define WARNING(x) { std::cerr << "Warning: " << x << embree_endl << std::flush; } #define NOT_IMPLEMENTED FATAL(std::string(__FUNCTION__) + " not implemented") //////////////////////////////////////////////////////////////////////////////// /// Basic types //////////////////////////////////////////////////////////////////////////////// /* default floating-point type */ namespace embree { typedef float real; } /* windows does not have ssize_t */ #if defined(__WIN32__) #if defined(__64BIT__) typedef int64_t ssize_t; #else typedef int32_t ssize_t; #endif #endif //////////////////////////////////////////////////////////////////////////////// /// Basic utility functions //////////////////////////////////////////////////////////////////////////////// __forceinline std::string toString(long long value) { return std::to_string(value); } //////////////////////////////////////////////////////////////////////////////// /// Disable some compiler warnings //////////////////////////////////////////////////////////////////////////////// #if defined(__INTEL_COMPILER) //#pragma warning(disable:265 ) // floating-point operation result is out of range //#pragma warning(disable:383 ) // value copied to temporary, reference to temporary used //#pragma warning(disable:869 ) // parameter was never referenced //#pragma warning(disable:981 ) // operands are evaluated in unspecified order //#pragma warning(disable:1418) // external function definition with no prior declaration //#pragma warning(disable:1419) // external declaration in primary source file //#pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable //#pragma warning(disable:94 ) // the size of an array must be greater than zero //#pragma warning(disable:1599) // declaration hides parameter //#pragma warning(disable:424 ) // extra ";" ignored #pragma warning(disable:2196) // routine is both "inline" and "noinline" //#pragma warning(disable:177 ) // label was declared but never referenced //#pragma warning(disable:114 ) // function was referenced but not defined //#pragma warning(disable:819 ) // template nesting depth does not match the previous declaration of function #pragma warning(disable:15335) // was not vectorized: vectorization possible but seems inefficient #endif #if defined(_MSC_VER) //#pragma warning(disable:4200) // nonstandard extension used : zero-sized array in struct/union #pragma warning(disable:4800) // forcing value to bool 'true' or 'false' (performance warning) //#pragma warning(disable:4267) // '=' : conversion from 'size_t' to 'unsigned long', possible loss of data #pragma warning(disable:4244) // 'argument' : conversion from 'ssize_t' to 'unsigned int', possible loss of data #pragma warning(disable:4267) // conversion from 'size_t' to 'const int', possible loss of data //#pragma warning(disable:4355) // 'this' : used in base member initializer list //#pragma warning(disable:391 ) // '<=' : signed / unsigned mismatch //#pragma warning(disable:4018) // '<' : signed / unsigned mismatch //#pragma warning(disable:4305) // 'initializing' : truncation from 'double' to 'float' //#pragma warning(disable:4068) // unknown pragma //#pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned //#pragma warning(disable:4838) // conversion from 'unsigned int' to 'const int' requires a narrowing conversion) //#pragma warning(disable:4227) // anachronism used : qualifiers on reference are ignored #pragma warning(disable:4503) // decorated name length exceeded, name was truncated #pragma warning(disable:4180) // qualifier applied to function type has no meaning; ignored #pragma warning(disable:4258) // definition from the for loop is ignored; the definition from the enclosing scope is used # if _MSC_VER < 1910 // prior to Visual studio 2017 (V141) # pragma warning(disable:4101) // warning C4101: 'x': unreferenced local variable // a compiler bug issues wrong warnings # pragma warning(disable:4789) // buffer '' of size 8 bytes will be overrun; 32 bytes will be written starting at offset 0 # endif #endif #if defined(__clang__) && !defined(__INTEL_COMPILER) //#pragma clang diagnostic ignored "-Wunknown-pragmas" //#pragma clang diagnostic ignored "-Wunused-variable" //#pragma clang diagnostic ignored "-Wreorder" //#pragma clang diagnostic ignored "-Wmicrosoft" //#pragma clang diagnostic ignored "-Wunused-private-field" //#pragma clang diagnostic ignored "-Wunused-local-typedef" //#pragma clang diagnostic ignored "-Wunused-function" //#pragma clang diagnostic ignored "-Wnarrowing" //#pragma clang diagnostic ignored "-Wc++11-narrowing" //#pragma clang diagnostic ignored "-Wdeprecated-register" //#pragma clang diagnostic ignored "-Wdeprecated-declarations" #endif #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wpragmas" //#pragma GCC diagnostic ignored "-Wnarrowing" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" //#pragma GCC diagnostic ignored "-Wdeprecated-declarations" //#pragma GCC diagnostic ignored "-Warray-bounds" #pragma GCC diagnostic ignored "-Wattributes" #pragma GCC diagnostic ignored "-Wmisleading-indentation" #pragma GCC diagnostic ignored "-Wsign-compare" #pragma GCC diagnostic ignored "-Wparentheses" #endif #if defined(__clang__) && defined(__WIN32__) #pragma clang diagnostic ignored "-Wunused-parameter" #pragma clang diagnostic ignored "-Wmicrosoft-cast" #pragma clang diagnostic ignored "-Wmicrosoft-enum-value" #pragma clang diagnostic ignored "-Wmicrosoft-include" #pragma clang diagnostic ignored "-Wunused-function" #pragma clang diagnostic ignored "-Wunknown-pragmas" #endif /* disabling deprecated warning, please use only where use of deprecated Embree API functions is desired */ #if defined(__WIN32__) && defined(__INTEL_COMPILER) #define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 1478)) // warning: function was declared deprecated #define ENABLE_DEPRECATED_WARNING __pragma(warning (enable: 1478)) // warning: function was declared deprecated #elif defined(__INTEL_COMPILER) #define DISABLE_DEPRECATED_WARNING _Pragma("warning (disable: 1478)") // warning: function was declared deprecated #define ENABLE_DEPRECATED_WARNING _Pragma("warning (enable : 1478)") // warning: function was declared deprecated #elif defined(__clang__) #define DISABLE_DEPRECATED_WARNING _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated #define ENABLE_DEPRECATED_WARNING _Pragma("clang diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated #elif defined(__GNUC__) #define DISABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") // warning: xxx is deprecated #define ENABLE_DEPRECATED_WARNING _Pragma("GCC diagnostic warning \"-Wdeprecated-declarations\"") // warning: xxx is deprecated #elif defined(_MSC_VER) #define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 4996)) // warning: function was declared deprecated #define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated #endif //////////////////////////////////////////////////////////////////////////////// /// SYCL specific //////////////////////////////////////////////////////////////////////////////// #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) #define sycl_printf0(format, ...) { \ static const CONSTANT char fmt[] = format; \ if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) \ SYCL_EXT_ONEAPI::experimental::printf(fmt, __VA_ARGS__ ); \ } #define sycl_printf0_(format) { \ static const CONSTANT char fmt[] = format; \ if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) \ SYCL_EXT_ONEAPI::experimental::printf(fmt); \ } #else #define sycl_printf0(format, ...) { \ static const CONSTANT char fmt[] = format; \ SYCL_EXT_ONEAPI::experimental::printf(fmt, __VA_ARGS__ ); \ } #define sycl_printf0_(format) { \ static const CONSTANT char fmt[] = format; \ SYCL_EXT_ONEAPI::experimental::printf(fmt); \ } #endif #define sycl_printf(format, ...) { \ static const CONSTANT char fmt[] = format; \ SYCL_EXT_ONEAPI::experimental::printf(fmt, __VA_ARGS__ ); \ } #define sycl_printf_(format) { \ static const CONSTANT char fmt[] = format; \ SYCL_EXT_ONEAPI::experimental::printf(fmt); \ } #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) namespace embree { struct sycl_ostream_ { sycl_ostream_ (bool uniform) : uniform(uniform) {} bool uniform = false; }; struct sycl_endl_ {}; #define embree_ostream embree::sycl_ostream_ #define embree_cout embree::sycl_ostream_(false) #define embree_cout_uniform embree::sycl_ostream_(true) #define embree_endl embree::sycl_endl_() inline sycl_ostream_ operator <<(sycl_ostream_ cout, int i) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%i",i); } else sycl_printf("%i ",i); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, unsigned int i) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%u",i); } else sycl_printf("%u ",i); return cout; } #if defined(__WIN32__) and defined(__INTEL_LLVM_COMPILER) inline sycl_ostream_ operator <<(sycl_ostream_ cout, size_t i) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%i",i); } else { sycl_printf("%i ",i); } return cout; } #endif inline sycl_ostream_ operator <<(sycl_ostream_ cout, float f) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%f",f); } else sycl_printf("%f ",f); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, double d) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%f",d); } else sycl_printf("%f ",d); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, ulong l) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%lu",l); } else sycl_printf("%lu ",l); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, long l) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%l",l); } else sycl_printf("%l ",l); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, void* p) { if (cout.uniform) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%p",p); } else sycl_printf("%p ",p); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, const char* c) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf("%s",c); return cout; } inline sycl_ostream_ operator <<(sycl_ostream_ cout, sycl_endl_) { if (get_sub_group_local_id() == SYCL_CTZ::ctz(intel_sub_group_ballot(true))) sycl_printf_("\n"); return cout; } } #else #define embree_ostream std::ostream& #define embree_cout std::cout #define embree_cout_uniform std::cout #define embree_endl std::endl #endif #if defined(EMBREE_SYCL_SUPPORT) /* printing out sycle vector types */ __forceinline embree_ostream operator<<(embree_ostream out, const sycl::float4& v) { return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::float3& v) { return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::float2& v) { return out << "(" << v.x() << "," << v.y() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::int4& v) { return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::int3& v) { return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::int2& v) { return out << "(" << v.x() << "," << v.y() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint4& v) { return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint3& v) { return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")"; } __forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint2& v) { return out << "(" << v.x() << "," << v.y() << ")"; } #endif inline void tab(std::ostream& cout, int n) { for (int i=0; i struct OnScopeExitHelper { OnScopeExitHelper (const Closure f) : active(true), f(f) {} ~OnScopeExitHelper() { if (active) f(); } void deactivate() { active = false; } bool active; const Closure f; }; template OnScopeExitHelper OnScopeExit(const Closure f) { return OnScopeExitHelper(f); } #define STRING_JOIN2(arg1, arg2) DO_STRING_JOIN2(arg1, arg2) #define DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2 #define ON_SCOPE_EXIT(code) \ auto STRING_JOIN2(on_scope_exit_, __LINE__) = OnScopeExit([&](){code;}) template std::unique_ptr make_unique(Ty* ptr) { return std::unique_ptr(ptr); } } level-zero-raytracing-support-1.2.3/rtbuild/sys/sysinfo.cpp000066400000000000000000000526661514453371700241660ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #if defined(__INTEL_LLVM_COMPILER) // prevents "'__thiscall' calling convention is not supported for this target" warning from TBB #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wignored-attributes" #endif #include "sysinfo.h" #include "intrinsics.h" #if defined(__FREEBSD__) #include #include typedef cpuset_t cpu_set_t; #endif //////////////////////////////////////////////////////////////////////////////// /// All Platforms //////////////////////////////////////////////////////////////////////////////// namespace embree { //NullTy null; std::string getPlatformName() { #if defined(__ANDROID__) && !defined(__64BIT__) return "Android (32bit)"; #elif defined(__ANDROID__) && defined(__64BIT__) return "Android (64bit)"; #elif defined(__LINUX__) && !defined(__64BIT__) return "Linux (32bit)"; #elif defined(__LINUX__) && defined(__64BIT__) return "Linux (64bit)"; #elif defined(__FREEBSD__) && !defined(__64BIT__) return "FreeBSD (32bit)"; #elif defined(__FREEBSD__) && defined(__64BIT__) return "FreeBSD (64bit)"; #elif defined(__CYGWIN__) && !defined(__64BIT__) return "Cygwin (32bit)"; #elif defined(__CYGWIN__) && defined(__64BIT__) return "Cygwin (64bit)"; #elif defined(__WIN32__) && !defined(__64BIT__) return "Windows (32bit)"; #elif defined(__WIN32__) && defined(__64BIT__) return "Windows (64bit)"; #elif defined(__MACOSX__) && !defined(__64BIT__) return "Mac OS X (32bit)"; #elif defined(__MACOSX__) && defined(__64BIT__) return "Mac OS X (64bit)"; #elif defined(__UNIX__) && !defined(__64BIT__) return "Unix (32bit)"; #elif defined(__UNIX__) && defined(__64BIT__) return "Unix (64bit)"; #else return "Unknown"; #endif } std::string getCompilerName() { #if defined(__INTEL_COMPILER) int icc_mayor = __INTEL_COMPILER / 100 % 100; int icc_minor = __INTEL_COMPILER % 100; std::string version = "Intel Compiler "; version += toString(icc_mayor); version += "." + toString(icc_minor); #if defined(__INTEL_COMPILER_UPDATE) version += "." + toString(__INTEL_COMPILER_UPDATE); #endif return version; #elif defined(__clang__) return "CLANG " __clang_version__; #elif defined (__GNUC__) return "GCC " __VERSION__; #elif defined(_MSC_VER) std::string version = toString(_MSC_FULL_VER); version.insert(4,"."); version.insert(9,"."); version.insert(2,"."); return "Visual C++ Compiler " + version; #else return "Unknown Compiler"; #endif } std::string getCPUVendor() { #if defined(__X86_ASM__) int cpuinfo[4]; __cpuid (cpuinfo, 0); int name[4]; name[0] = cpuinfo[1]; name[1] = cpuinfo[3]; name[2] = cpuinfo[2]; name[3] = 0; return (char*)name; #elif defined(__ARM_NEON) return "ARM"; #else return "Unknown"; #endif } CPU getCPUModel() { #if defined(__X86_ASM__) if (getCPUVendor() != "GenuineIntel") return CPU::UNKNOWN; int out[4]; __cpuid(out, 0); if (out[0] < 1) return CPU::UNKNOWN; __cpuid(out, 1); /* please see CPUID documentation for these formulas */ uint32_t family_ID = (out[0] >> 8) & 0x0F; uint32_t extended_family_ID = (out[0] >> 20) & 0xFF; uint32_t model_ID = (out[0] >> 4) & 0x0F; uint32_t extended_model_ID = (out[0] >> 16) & 0x0F; uint32_t DisplayFamily = family_ID; if (family_ID == 0x0F) DisplayFamily += extended_family_ID; uint32_t DisplayModel = model_ID; if (family_ID == 0x06 || family_ID == 0x0F) DisplayModel += extended_model_ID << 4; uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0); // Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel) if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE; if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE; if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE; if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE; if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE; if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE; if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE; if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE; if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE; if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE; if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE; if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE; if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE; if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL; if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL; if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL; if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL; if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL; if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL; if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL; if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL; if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE; if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE; if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE; if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE; if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE; if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM; if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2; if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2; if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1; if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL; if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING; #elif defined(__ARM_NEON) return CPU::ARM; #endif return CPU::UNKNOWN; } std::string stringOfCPUModel(CPU model) { switch (model) { case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake"; case CPU::CORE_ICE_LAKE : return "Core Ice Lake"; case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake"; case CPU::CORE_COMET_LAKE : return "Core Comet Lake"; case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake"; case CPU::CORE_KABY_LAKE : return "Core Kaby Lake"; case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake"; case CPU::CORE_SKY_LAKE : return "Core Sky Lake"; case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill"; case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing"; case CPU::XEON_BROADWELL : return "Xeon Broadwell"; case CPU::CORE_BROADWELL : return "Core Broadwell"; case CPU::XEON_HASWELL : return "Xeon Haswell"; case CPU::CORE_HASWELL : return "Core Haswell"; case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge"; case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge"; case CPU::SANDY_BRIDGE : return "Sandy Bridge"; case CPU::NEHALEM : return "Nehalem"; case CPU::CORE2 : return "Core2"; case CPU::CORE1 : return "Core"; case CPU::ARM : return "ARM"; case CPU::UNKNOWN : return "Unknown CPU"; } return "Unknown CPU (error)"; } #if defined(__X86_ASM__) /* constants to access destination registers of CPUID instruction */ static const int EAX = 0; static const int EBX = 1; static const int ECX = 2; static const int EDX = 3; /* cpuid[eax=1].ecx */ static const int CPU_FEATURE_BIT_SSE3 = 1 << 0; static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9; static const int CPU_FEATURE_BIT_FMA3 = 1 << 12; static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19; static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20; //static const int CPU_FEATURE_BIT_MOVBE = 1 << 22; static const int CPU_FEATURE_BIT_POPCNT = 1 << 23; //static const int CPU_FEATURE_BIT_XSAVE = 1 << 26; static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27; static const int CPU_FEATURE_BIT_AVX = 1 << 28; static const int CPU_FEATURE_BIT_F16C = 1 << 29; static const int CPU_FEATURE_BIT_RDRAND = 1 << 30; /* cpuid[eax=1].edx */ static const int CPU_FEATURE_BIT_SSE = 1 << 25; static const int CPU_FEATURE_BIT_SSE2 = 1 << 26; /* cpuid[eax=0x80000001].ecx */ static const int CPU_FEATURE_BIT_LZCNT = 1 << 5; /* cpuid[eax=7,ecx=0].ebx */ static const int CPU_FEATURE_BIT_BMI1 = 1 << 3; static const int CPU_FEATURE_BIT_AVX2 = 1 << 5; static const int CPU_FEATURE_BIT_BMI2 = 1 << 8; static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation) static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions) static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions) static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions) static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions) static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions) static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions) static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions) /* cpuid[eax=7,ecx=0].ecx */ static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions) #endif #if defined(__X86_ASM__) __noinline int64_t get_xcr0() { #if defined (__WIN32__) && !defined (__MINGW32__) && defined(_XCR_XFEATURE_ENABLED_MASK) int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32 xcr0 = _xgetbv(0); return xcr0; #else int xcr0 = 0; __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); return xcr0; #endif } #endif int getCPUFeatures() { #if defined(__X86_ASM__) /* cache CPU features access */ static int cpu_features = 0; if (cpu_features) return cpu_features; /* get number of CPUID leaves */ int cpuid_leaf0[4]; __cpuid(cpuid_leaf0, 0x00000000); unsigned nIds = cpuid_leaf0[EAX]; /* get number of extended CPUID leaves */ int cpuid_leafe[4]; __cpuid(cpuid_leafe, 0x80000000); unsigned nExIds = cpuid_leafe[EAX]; /* get CPUID leaves for EAX = 1,7, and 0x80000001 */ int cpuid_leaf_1[4] = { 0,0,0,0 }; int cpuid_leaf_7[4] = { 0,0,0,0 }; int cpuid_leaf_e1[4] = { 0,0,0,0 }; if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001); #if _WIN32 #if _MSC_VER && (_MSC_FULL_VER < 160040219) #else if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0); #endif #else if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0); #endif if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001); /* detect if OS saves XMM, YMM, and ZMM states */ bool xmm_enabled = true; bool ymm_enabled = false; bool zmm_enabled = false; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) { int64_t xcr0 = get_xcr0(); xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */ ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */ zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */ } if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED; if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED; if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED; if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE; if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3; if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT; if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1; if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL; if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI; return cpu_features; #elif defined(__ARM_NEON) int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2; cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42; cpu_features |= CPU_FEATURE_XMM_ENABLED; cpu_features |= CPU_FEATURE_YMM_ENABLED; cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C; cpu_features |= CPU_FEATURE_POPCNT; cpu_features |= CPU_FEATURE_AVX; cpu_features |= CPU_FEATURE_AVX2; cpu_features |= CPU_FEATURE_FMA3; cpu_features |= CPU_FEATURE_LZCNT; cpu_features |= CPU_FEATURE_BMI1; cpu_features |= CPU_FEATURE_BMI2; cpu_features |= CPU_FEATURE_NEON_2X; return cpu_features; #else /* Unknown CPU. */ return 0; #endif } std::string stringOfCPUFeatures(int features) { std::string str; if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM "; if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM "; if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM "; if (features & CPU_FEATURE_SSE ) str += "SSE "; if (features & CPU_FEATURE_SSE2 ) str += "SSE2 "; if (features & CPU_FEATURE_SSE3 ) str += "SSE3 "; if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 "; if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 "; if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 "; if (features & CPU_FEATURE_POPCNT) str += "POPCNT "; if (features & CPU_FEATURE_AVX ) str += "AVX "; if (features & CPU_FEATURE_F16C ) str += "F16C "; if (features & CPU_FEATURE_RDRAND) str += "RDRAND "; if (features & CPU_FEATURE_AVX2 ) str += "AVX2 "; if (features & CPU_FEATURE_FMA3 ) str += "FMA3 "; if (features & CPU_FEATURE_LZCNT ) str += "LZCNT "; if (features & CPU_FEATURE_BMI1 ) str += "BMI1 "; if (features & CPU_FEATURE_BMI2 ) str += "BMI2 "; if (features & CPU_FEATURE_AVX512F) str += "AVX512F "; if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ "; if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF "; if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER "; if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD "; if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW "; if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL "; if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA "; if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI "; if (features & CPU_FEATURE_NEON) str += "NEON "; if (features & CPU_FEATURE_NEON_2X) str += "2xNEON "; return str; } std::string stringOfISA (int isa) { if (isa == SSE) return "SSE"; if (isa == SSE2) return "SSE2"; if (isa == SSE3) return "SSE3"; if (isa == SSSE3) return "SSSE3"; if (isa == SSE41) return "SSE4.1"; if (isa == SSE42) return "SSE4.2"; if (isa == AVX) return "AVX"; if (isa == AVX2) return "AVX2"; if (isa == AVX512) return "AVX512"; if (isa == NEON) return "NEON"; if (isa == NEON_2X) return "2xNEON"; return "UNKNOWN"; } bool hasISA(int features, int isa) { return (features & isa) == isa; } std::string supportedTargetList (int features) { std::string v; if (hasISA(features,SSE)) v += "SSE "; if (hasISA(features,SSE2)) v += "SSE2 "; if (hasISA(features,SSE3)) v += "SSE3 "; if (hasISA(features,SSSE3)) v += "SSSE3 "; if (hasISA(features,SSE41)) v += "SSE4.1 "; if (hasISA(features,SSE42)) v += "SSE4.2 "; if (hasISA(features,AVX)) v += "AVX "; if (hasISA(features,AVXI)) v += "AVXI "; if (hasISA(features,AVX2)) v += "AVX2 "; if (hasISA(features,AVX512)) v += "AVX512 "; if (hasISA(features,NEON)) v += "NEON "; if (hasISA(features,NEON_2X)) v += "2xNEON "; return v; } } //////////////////////////////////////////////////////////////////////////////// /// Windows Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__WIN32__) #define WIN32_LEAN_AND_MEAN #include #include namespace embree { std::string getExecutableFileName() { char filename[1024]; if (!GetModuleFileName(nullptr, filename, sizeof(filename))) return std::string(); return std::string(filename); } int getTerminalWidth() { HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE); if (handle == INVALID_HANDLE_VALUE) return 80; CONSOLE_SCREEN_BUFFER_INFO info; memset(&info,0,sizeof(info)); GetConsoleScreenBufferInfo(handle, &info); return info.dwSize.X; } double getSeconds() { LARGE_INTEGER freq, val; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&val); return (double)val.QuadPart / (double)freq.QuadPart; } void sleepSeconds(double t) { Sleep(DWORD(1000.0*t)); } } #endif //////////////////////////////////////////////////////////////////////////////// /// Linux Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__LINUX__) #include #include namespace embree { std::string getExecutableFileName() { std::string pid = "/proc/" + toString(getpid()) + "/exe"; char buf[4096]; memset(buf,0,sizeof(buf)); if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1) return std::string(); return std::string(buf); } } #endif //////////////////////////////////////////////////////////////////////////////// /// FreeBSD Platform //////////////////////////////////////////////////////////////////////////////// #if defined (__FreeBSD__) #include namespace embree { std::string getExecutableFileName() { const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; char buf[4096]; memset(buf,0,sizeof(buf)); size_t len = sizeof(buf)-1; if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1) return std::string(); return std::string(buf); } } #endif //////////////////////////////////////////////////////////////////////////////// /// Mac OS X Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__MACOSX__) #include namespace embree { std::string getExecutableFileName() { char buf[4096]; uint32_t size = sizeof(buf); if (_NSGetExecutablePath(buf, &size) != 0) return std::string(); return std::string(buf); } } #endif //////////////////////////////////////////////////////////////////////////////// /// Unix Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__UNIX__) #include #include #include #include namespace embree { int getTerminalWidth() { struct winsize info; if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80; return info.ws_col; } double getSeconds() { struct timeval tp; gettimeofday(&tp,nullptr); return double(tp.tv_sec) + double(tp.tv_usec)/1E6; } void sleepSeconds(double t) { usleep(1000000.0*t); } } #endif #if defined(__INTEL_LLVM_COMPILER) #pragma clang diagnostic pop #endif level-zero-raytracing-support-1.2.3/rtbuild/sys/sysinfo.h000066400000000000000000000107531514453371700236220ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #define CACHELINE_SIZE 64 #if !defined(PAGE_SIZE) #define PAGE_SIZE 4096 #endif #define PAGE_SIZE_2M (2*1024*1024) #define PAGE_SIZE_4K (4*1024) #include "platform.h" #if !defined(__SSE__) #define __SSE__ #endif #if !defined(__SSE2__) #define __SSE2__ #endif namespace embree { enum class CPU { XEON_ICE_LAKE, CORE_ICE_LAKE, CORE_TIGER_LAKE, CORE_COMET_LAKE, CORE_CANNON_LAKE, CORE_KABY_LAKE, XEON_SKY_LAKE, CORE_SKY_LAKE, XEON_PHI_KNIGHTS_MILL, XEON_PHI_KNIGHTS_LANDING, XEON_BROADWELL, CORE_BROADWELL, XEON_HASWELL, CORE_HASWELL, XEON_IVY_BRIDGE, CORE_IVY_BRIDGE, SANDY_BRIDGE, NEHALEM, CORE2, CORE1, ARM, UNKNOWN, }; /*! get the full path to the running executable */ std::string getExecutableFileName(); /*! return platform name */ std::string getPlatformName(); /*! get the full name of the compiler */ std::string getCompilerName(); /*! return the name of the CPU */ std::string getCPUVendor(); /*! get microprocessor model */ CPU getCPUModel(); /*! converts CPU model into string */ std::string stringOfCPUModel(CPU model); /*! CPU features */ static const int CPU_FEATURE_SSE = 1 << 0; static const int CPU_FEATURE_SSE2 = 1 << 1; static const int CPU_FEATURE_SSE3 = 1 << 2; static const int CPU_FEATURE_SSSE3 = 1 << 3; static const int CPU_FEATURE_SSE41 = 1 << 4; static const int CPU_FEATURE_SSE42 = 1 << 5; static const int CPU_FEATURE_POPCNT = 1 << 6; static const int CPU_FEATURE_AVX = 1 << 7; static const int CPU_FEATURE_F16C = 1 << 8; static const int CPU_FEATURE_RDRAND = 1 << 9; static const int CPU_FEATURE_AVX2 = 1 << 10; static const int CPU_FEATURE_FMA3 = 1 << 11; static const int CPU_FEATURE_LZCNT = 1 << 12; static const int CPU_FEATURE_BMI1 = 1 << 13; static const int CPU_FEATURE_BMI2 = 1 << 14; static const int CPU_FEATURE_AVX512F = 1 << 16; static const int CPU_FEATURE_AVX512DQ = 1 << 17; static const int CPU_FEATURE_AVX512PF = 1 << 18; static const int CPU_FEATURE_AVX512ER = 1 << 19; static const int CPU_FEATURE_AVX512CD = 1 << 20; static const int CPU_FEATURE_AVX512BW = 1 << 21; static const int CPU_FEATURE_AVX512VL = 1 << 22; static const int CPU_FEATURE_AVX512IFMA = 1 << 23; static const int CPU_FEATURE_AVX512VBMI = 1 << 24; static const int CPU_FEATURE_XMM_ENABLED = 1 << 25; static const int CPU_FEATURE_YMM_ENABLED = 1 << 26; static const int CPU_FEATURE_ZMM_ENABLED = 1 << 27; static const int CPU_FEATURE_NEON = 1 << 28; static const int CPU_FEATURE_NEON_2X = 1 << 29; /*! get CPU features */ int getCPUFeatures(); /*! convert CPU features into a string */ std::string stringOfCPUFeatures(int features); /*! creates a string of all supported targets that are supported */ std::string supportedTargetList (int isa); /*! ISAs */ static const int SSE = CPU_FEATURE_SSE | CPU_FEATURE_XMM_ENABLED; static const int SSE2 = SSE | CPU_FEATURE_SSE2; static const int SSE3 = SSE2 | CPU_FEATURE_SSE3; static const int SSSE3 = SSE3 | CPU_FEATURE_SSSE3; static const int SSE41 = SSSE3 | CPU_FEATURE_SSE41; static const int SSE42 = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT; static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED; static const int AVXI = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND; static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT; static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED; static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2; static const int NEON_2X = CPU_FEATURE_NEON_2X | AVX2; /*! converts ISA bitvector into a string */ std::string stringOfISA(int features); /*! return the number of logical threads of the system */ unsigned int getNumberOfLogicalThreads(); /*! returns the size of the terminal window in characters */ int getTerminalWidth(); /*! returns performance counter in seconds */ double getSeconds(); /*! sleeps the specified number of seconds */ void sleepSeconds(double t); /*! returns virtual address space occupied by process */ size_t getVirtualMemoryBytes(); /*! returns resident memory required by process */ size_t getResidentMemoryBytes(); } level-zero-raytracing-support-1.2.3/rtbuild/sys/vector.h000066400000000000000000000201031514453371700234200ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "alloc.h" #include namespace embree { class Device; template class vector_t { public: typedef T value_type; typedef T* iterator; typedef const T* const_iterator; __forceinline vector_t () : size_active(0), size_alloced(0), items(nullptr) {} __forceinline explicit vector_t (size_t sz) : size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); } template __forceinline explicit vector_t (M alloc, size_t sz) : alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); } __forceinline vector_t (Device* alloc) : vector_t(alloc,0) {} __forceinline vector_t(void* data, size_t bytes) : size_active(0), size_alloced(bytes/sizeof(T)), items((T*)data) {} __forceinline ~vector_t() { clear(); } __forceinline vector_t (const vector_t& other) { size_active = other.size_active; size_alloced = other.size_alloced; items = alloc.allocate(size_alloced); for (size_t i=0; i 0); return items[0]; }; __forceinline T& back () const { assert(size_active > 0); return items[size_active-1]; }; __forceinline T* data() { return items; }; __forceinline const T* data() const { return items; }; /******************** Modifiers **************************/ __forceinline void push_back(const T& nt) { const T v = nt; // need local copy as input reference could point to this vector internal_resize(size_active,internal_grow_size(size_active+1)); ::new (&items[size_active++]) T(v); } __forceinline void pop_back() { assert(!empty()); size_active--; items[size_active].~T(); } __forceinline void clear() { /* destroy elements */ for (size_t i=0; i using vector = vector_t>; /*! vector class that performs aligned allocations */ template using avector = vector_t::value> >; /*! vector class that performs OS allocations */ template using ovector = vector_t >; /*! vector class with externally managed data buffer */ template using evector = vector_t>; } level-zero-raytracing-support-1.2.3/rttrace/000077500000000000000000000000001514453371700211325ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/rttrace/CMakeLists.txt000066400000000000000000000014011514453371700236660ustar00rootroot00000000000000## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header (FIXME: why required?) ADD_LIBRARY(embree_rthwif_sycl STATIC rttrace_validation.cpp) SET_PROPERTY(TARGET embree_rthwif_sycl APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64 -DEMBREE_SYCL_SUPPORT") INSTALL(TARGETS embree_rthwif_sycl EXPORT embree_rthwif_sycl-targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) INSTALL(EXPORT embree_rthwif_sycl-targets DESTINATION "${EMBREE_CMAKEEXPORT_DIR}" COMPONENT devel) level-zero-raytracing-support-1.2.3/rttrace/rttrace.h000066400000000000000000000273321514453371700227560ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if defined(ZE_RAYTRACING_RT_SIMULATION) #include "rtcore.h" #endif #if defined(EMBREE_SYCL_RT_VALIDATION_API) # include "rttrace_validation.h" #else #include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" #pragma clang diagnostic ignored "-W#pragma-messages" #include #pragma clang diagnostic pop #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreturn-type-c-linkage" enum intel_ray_flags_t { intel_ray_flags_none = 0x00, intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation) intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader) intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays) intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses. intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders }; enum intel_hit_type_t { intel_hit_type_committed_hit = 0, intel_hit_type_potential_hit = 1, }; enum intel_raytracing_ext_flag_t { intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported }; // opaque types typedef __attribute__((opencl_private)) struct intel_ray_query_opaque_t* intel_ray_query_t; typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t; struct intel_float2 { float x, y; intel_float2() {} intel_float2(float x, float y) : x(x), y(y) {} intel_float2(sycl::float2 v) : x(v.x()), y(v.y()) {} operator sycl::float2() { return sycl::float2(x,y); } }; struct intel_float3 { float x, y, z; intel_float3() {} intel_float3(float x, float y, float z) : x(x), y(y), z(z) {} intel_float3(sycl::float3 v) : x(v.x()), y(v.y()), z(v.z()) {} operator sycl::float3() { return sycl::float3(x,y,z); } }; struct intel_float4x3 { intel_float3 vx, vy, vz, p; }; struct intel_ray_desc_t { intel_float3 origin; intel_float3 direction; float tmin; float tmax; unsigned int mask; intel_ray_flags_t flags; }; // if traversal returns one can test if a triangle or procedural is hit enum intel_candidate_type_t { intel_candidate_type_triangle, intel_candidate_type_procedural }; #ifdef __SYCL_DEVICE_ONLY__ // check supported ray tracing features SYCL_EXTERNAL extern "C" intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag(); // initializes a ray query SYCL_EXTERNAL extern "C" intel_ray_query_t intel_ray_query_init( intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel ); // setup for instance traversal using a transformed ray and bottom-level AS SYCL_EXTERNAL extern "C" void intel_ray_query_forward_ray( intel_ray_query_t query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel ); // commit the potential hit SYCL_EXTERNAL extern "C" void intel_ray_query_commit_potential_hit( intel_ray_query_t query ); // commit the potential hit and override hit distance and UVs SYCL_EXTERNAL extern "C" void intel_ray_query_commit_potential_hit_override( intel_ray_query_t query, float override_hit_distance, intel_float2 override_uv ); // start traversal of a ray query SYCL_EXTERNAL extern "C" void intel_ray_query_start_traversal( intel_ray_query_t query ); // synchronize rayquery execution. If a ray was dispatched, // This must be called prior to calling any of the accessors below. SYCL_EXTERNAL extern "C" void intel_ray_query_sync( intel_ray_query_t query ); // signal that a ray query will not be used further. This is the moral equaivalent of a delete // this function does an implicit sync SYCL_EXTERNAL extern "C" void intel_ray_query_abandon( intel_ray_query_t query ); // read hit information during shader execution SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_bvh_level( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" float intel_get_hit_distance( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" intel_float2 intel_get_hit_barycentrics( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" bool intel_get_hit_front_face( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_geometry_id(intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); // fast path for quad leaves SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ); // fast path for procedural leaves SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_instance_id( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" unsigned int intel_get_hit_instance_user_id( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t query, intel_hit_type_t hit_type ); SYCL_EXTERNAL extern "C" intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t query, intel_hit_type_t hit_type ); // fetch triangle vertices for a hit SYCL_EXTERNAL extern "C" void intel_get_hit_triangle_vertices( intel_ray_query_t query, intel_float3 vertices_out[3], intel_hit_type_t hit_type ); // Read ray-data. This is used to read transformed rays produced by HW instancing pipeline // during any-hit or intersection shader execution. SYCL_EXTERNAL extern "C" intel_float3 intel_get_ray_origin( intel_ray_query_t query, unsigned int bvh_level ); SYCL_EXTERNAL extern "C" intel_float3 intel_get_ray_direction( intel_ray_query_t query, unsigned int bvh_level ); SYCL_EXTERNAL extern "C" float intel_get_ray_tmin( intel_ray_query_t query, unsigned int bvh_level ); SYCL_EXTERNAL extern "C" intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t query, unsigned int bvh_level ); SYCL_EXTERNAL extern "C" unsigned int intel_get_ray_mask( intel_ray_query_t query, unsigned int bvh_level ); SYCL_EXTERNAL extern "C" intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t query, intel_hit_type_t hit_type ); // test whether traversal has terminated. If false, the ray has reached // a procedural leaf or a non-opaque triangle leaf, and requires shader processing SYCL_EXTERNAL extern "C" bool intel_is_traversal_done( intel_ray_query_t query ); // if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader SYCL_EXTERNAL extern "C" bool intel_has_committed_hit( intel_ray_query_t query ); #else inline intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag() { return intel_raytracing_ext_flag_ray_query; } inline intel_ray_query_t intel_ray_query_init( intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel ) { return NULL; } // setup for instance traversal using a transformed ray and bottom-level AS inline void intel_ray_query_forward_ray( intel_ray_query_t query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel ) {} // commit the potential hit inline void intel_ray_query_commit_potential_hit( intel_ray_query_t query ) {} // commit the potential hit and override hit distance and UVs inline void intel_ray_query_commit_potential_hit_override( intel_ray_query_t query, float override_hit_distance, intel_float2 override_uv ) {} // start traversal of a ray query inline void intel_ray_query_start_traversal( intel_ray_query_t query ) {} // synchronize rayquery execution. If a ray was dispatched, // This must be called prior to calling any of the accessors below. inline void intel_ray_query_sync( intel_ray_query_t query ) {} // signal that a ray query will not be used further. This is the moral equaivalent of a delete // this function does an implicit sync inline void intel_ray_query_abandon( intel_ray_query_t query ) {} // read hit information during shader execution inline unsigned int intel_get_hit_bvh_level( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } inline float intel_get_hit_distance( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0.0f; } inline intel_float2 intel_get_hit_barycentrics( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { 0,0 }; } inline bool intel_get_hit_front_face( intel_ray_query_t query, intel_hit_type_t hit_type ) { return false; } inline unsigned int intel_get_hit_geometry_id(intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } inline unsigned int intel_get_hit_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } inline unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } // fast path for quad leaves inline unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } // fast path for procedural leaves inline unsigned int intel_get_hit_instance_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } inline unsigned int intel_get_hit_instance_user_id( intel_ray_query_t query, intel_hit_type_t hit_type ) { return 0; } inline intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }; } inline intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t query, intel_hit_type_t hit_type ) { return { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }; } // fetch triangle vertices for a hit inline void intel_get_hit_triangle_vertices( intel_ray_query_t query, intel_float3 vertices_out[3], intel_hit_type_t hit_type ) {} // Read ray-data. This is used to read transformed rays produced by HW instancing pipeline // during any-hit or intersection shader execution. inline intel_float3 intel_get_ray_origin( intel_ray_query_t query, unsigned int bvh_level ) { return { 0,0,0 }; } inline intel_float3 intel_get_ray_direction( intel_ray_query_t query, unsigned int bvh_level ) { return { 0,0,0 }; } inline float intel_get_ray_tmin( intel_ray_query_t query, unsigned int bvh_level ) { return 0.0f; } inline intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t query, unsigned int bvh_level ) { return intel_ray_flags_none; } inline unsigned int intel_get_ray_mask( intel_ray_query_t query, unsigned int bvh_level ) { return 0; } inline intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t query, intel_hit_type_t hit_type ) { return intel_candidate_type_triangle; } // test whether traversal has terminated. If false, the ray has reached // a procedural leaf or a non-opaque triangle leaf, and requires shader processing inline bool intel_is_traversal_done( intel_ray_query_t query ) { return false; } // if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader inline bool intel_has_committed_hit( intel_ray_query_t query ) { return false; } #endif #pragma clang diagnostic pop #endif level-zero-raytracing-support-1.2.3/rttrace/rttrace_internal.h000066400000000000000000000425051514453371700246510ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if ZE_RAYTRACING_DEVICE == 3 # define MemRay MemRayV2 # define MemHit MemHitV2 # define QuadLeaf QuadLeafV2 # define InstanceLeaf InstanceLeafV2 #else # define MemRay MemRayV1 # define MemHit MemHitV1 # define QuadLeaf QuadLeafV1 # define InstanceLeaf InstanceLeafV1 #endif #include enum TraceRayCtrl { TRACE_RAY_INITIAL = 0, // Initializes hit and initializes traversal state TRACE_RAY_INSTANCE = 1, // Loads committed hit and initializes traversal state TRACE_RAY_COMMIT = 2, // Loads potential hit and loads traversal state TRACE_RAY_CONTINUE = 3, // Loads committed hit and loads traversal state TRACE_RAY_DONE = 256, // for internal use only }; typedef __attribute__((opencl_global)) struct rtglobals_opaque_t* rtglobals_t; typedef __attribute__((opencl_private)) struct rtfence_opaque_t* rtfence_t; #if defined(__SYCL_DEVICE_ONLY__) || defined(EMBREE_SYCL_RT_SIMULATION) SYCL_EXTERNAL extern "C" __attribute__((opencl_global)) void* intel_get_implicit_dispatch_globals(); SYCL_EXTERNAL extern "C" void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals); SYCL_EXTERNAL extern "C" void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals); SYCL_EXTERNAL extern "C" void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals); SYCL_EXTERNAL extern "C" rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl); SYCL_EXTERNAL extern "C" void intel_rt_sync(rtfence_t fence); #else inline void* intel_get_implicit_dispatch_globals() { return nullptr; } inline void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals) { return nullptr; } inline void* intel_get_thread_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; } inline void* intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals) { return nullptr; } inline rtfence_t intel_dispatch_trace_ray_query(rtglobals_t rt_dispatch_globals, unsigned int bvh_level, unsigned int traceRayCtrl) { return nullptr; } inline void intel_rt_sync(rtfence_t fence) {} #endif enum NodeType { NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children NODE_TYPE_INSTANCE = 0x1, // instance leaf NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf NODE_TYPE_QUAD = 0x4, // quad leaf NODE_TYPE_INVALID = 0x7 // indicates invalid node }; struct __attribute__ ((packed,aligned(32))) MemRayV1 { void init(intel_ray_desc_t ray, uint64_t rootNodePtr_i) { org[0] = ray.origin.x; org[1] = ray.origin.y; org[2] = ray.origin.z; dir[0] = ray.direction.x; dir[1] = ray.direction.y; dir[2] = ray.direction.z; tnear = ray.tmin; tfar = ray.tmax; rootNodePtr = rootNodePtr_i; rayFlags = ray.flags; hitGroupSRBasePtr = 0; hitGroupSRStride = 0; missSRPtr = 0; pad0 = 0; shaderIndexMultiplier = 0; instLeafPtr = 0; rayMask = ray.mask; pad1 = 0; } // 32 B float org[3]; float dir[3]; float tnear; float tfar; // 32 B struct { // FIXME: removing these anonymous structs triggers IGC bug uint64_t rootNodePtr : 48; // root node to start traversal at uint64_t rayFlags : 16; // ray flags (see RayFlag structure) }; struct { uint64_t hitGroupSRBasePtr : 48; // base of hit group shader record array (16-bytes alignment) uint64_t hitGroupSRStride : 16; // stride of hit group shader record array (16-bytes alignment) }; struct { uint64_t missSRPtr : 48; // pointer to miss shader record to invoke on a miss (8-bytes alignment) uint64_t pad0 : 8; // padding byte (has to be zero) uint64_t shaderIndexMultiplier : 8; // shader index multiplier }; struct { uint64_t instLeafPtr : 48; // the pointer to instance leaf in case we traverse an instance (64-bytes alignment) uint64_t rayMask : 8; // ray mask used for ray masking uint64_t pad1 : 8; // padding byte (has to be zero) }; }; struct __attribute__ ((packed,aligned(32))) MemRayV2 { void init(intel_ray_desc_t ray, uint64_t rootNodePtr_i) { org[0] = ray.origin.x; org[1] = ray.origin.y; org[2] = ray.origin.z; dir[0] = ray.direction.x; dir[1] = ray.direction.y; dir[2] = ray.direction.z; tnear = ray.tmin; tfar = ray.tmax; rootNodePtr = rootNodePtr_i; instLeafPtr = 0; rayFlags = ray.flags; rayMask = ray.mask; ComparisonValue = 0; pad1 = 0; hitGroupIndex = 0; missShaderIndex = 0; shaderIndexMultiplier = 0; pad2 = 0; internalRayFlags = 0; time = 0.0f; } // 32 B float org[3]; float dir[3]; float tnear; float tfar; uint64_t rootNodePtr; // root node to start traversal at uint64_t instLeafPtr; // the pointer to instance leaf in case we traverse an instance (64-bytes alignment) uint32_t rayFlags : 16; // ray flags (see RayFlag structure) uint32_t rayMask : 8; // ray mask used for ray masking uint32_t ComparisonValue : 7; // to be compared with Instance.ComparisonMask uint32_t pad1 : 1; uint32_t hitGroupIndex; // hit group shader index uint32_t missShaderIndex : 16; // index of miss shader to invoke on a miss uint32_t shaderIndexMultiplier : 4; // shader index multiplier uint32_t pad2 : 4; uint32_t internalRayFlags : 8; // Xe3: internal ray flags (see internal section of RayFlag structure) float time; // ray time in range [0,1] }; struct __attribute__ ((packed,aligned(32))) MemHitV1 { inline float getT() const { return ft; } inline void setT(float t) { ft = t; } inline float getU() const { return fu; } inline void setU(float u) { fu = u; } inline float getV() const { return fv; } inline void setV(float v) { fv = v; } inline void* getPrimLeafPtr() { return sycl::global_ptr((void*)(uint64_t(primLeafPtr)*64)).get(); } inline void* getInstanceLeafPtr() { return sycl::global_ptr((void*)(uint64_t(instLeafPtr)*64)).get(); } public: float ft; // hit distance of current hit (or initial traversal distance) float fu,fv; // barycentric hit coordinates union { struct { uint32_t primIndexDelta : 16; // prim index delta for compressed meshlets and quads uint32_t valid : 1; // set if there is a hit uint32_t leafType : 3; // type of node primLeafPtr is pointing to uint32_t primLeafIndex : 4; // index of the hit primitive inside the leaf uint32_t bvhLevel : 3; // the instancing level at which the hit occured uint32_t frontFace : 1; // whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders) uint32_t done : 1; // used in sync mode to indicate that traversal is done uint32_t pad0 : 3; // unused bits }; uint32_t data; }; struct { // FIXME: removing these anonymous structs triggers IGC bug int64_t primLeafPtr : 42; // pointer to BVH leaf node (multiple of 64 bytes) uint64_t hitGroupRecPtr0 : 22; // LSB of hit group record of the hit triangle (multiple of 16 bytes) }; struct { int64_t instLeafPtr : 42; // pointer to BVH instance leaf node (in multiple of 64 bytes) uint64_t hitGroupRecPtr1 : 22; // MSB of hit group record of the hit triangle (multiple of 16 bytes) }; void clear(bool _done, bool _valid) { //*(sycl::int8*) this = sycl::int8(0x7F800000 /* INFINITY */, 0, 0, (_done ? 0x10000000 : 0) | (_valid ? 0x10000), 0, 0, 0, 0); ft = fu = fv = 0.0f; data = 0; done = _done ? 1 : 0; valid = _valid ? 1 : 0; } }; struct __attribute__ ((packed,aligned(32))) MemHitV2 { inline float getT() const { return ft; } inline void setT(float t) { ft = t; } inline float getU() const { return float(iu) * (1.0f/0xFFFFFF); } inline void setU(float u) { iu = (uint32_t) sycl::round(std::min(std::max(u,0.0f),1.0f)*0xFFFFFF); } inline float getV() const { return float(iv) * (1.0f/0xFFFFFF); } inline void setV(float v) { iv = (uint32_t) sycl::round(std::min(std::max(v,0.0f),1.0f)*0xFFFFFF); } inline void* getPrimLeafPtr() { return sycl::global_ptr((void*)(uint64_t(primLeafPtr)*64)).get(); } inline void* getInstanceLeafPtr() { return sycl::global_ptr((void*)(uint64_t(instLeafPtr)*64)).get(); } public: float ft; // hit distance of current hit (or initial traversal distance) uint32_t iu : 24; // barycentric u hit coordinate stored as unorm24 (scaled by 0xFFFFFF) uint32_t hitGroupIndex0 : 8; // 1st bits of hitGroupIndex uint32_t iv : 24; // barycentric v hit coordinate stored as unorm24 (scaled by 0xFFFFFF) uint32_t hitGroupIndex1 : 8; // 2nd bits of hitGroupIndex union { struct { uint32_t primIndexDelta : 5; // prim index delta for second triangle of quad leaf uint32_t pad1 : 7; // unused bits (MBZ) uint32_t leafNodeSubType : 4; // sub-type of node primLeafPtr is pointing to uint32_t valid : 1; // set if there is a hit uint32_t leafType : 3; // type of node primLeafPtr is pointing to uint32_t primLeafIndex : 4; // index of the hit primitive inside the leaf uint32_t bvhLevel : 3; // the instancing level at which the hit occured uint32_t frontFace : 1; // whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders) uint32_t done : 1; // used in sync mode to indicate that traversal is done uint32_t needSWSTOC : 1; // If set, any-hit shader must perform a SW fallback for STOC test uint32_t pad0 : 2; // unused bits (MBZ) }; uint32_t data; }; uint64_t hitGroupIndex2 : 6; // 3rd bits of hitGroupIndex int64_t primLeafPtr : 58; // pointer to BVH leaf node (MSB of 64b pointer aligned to 64b) uint64_t hitGroupIndex3 : 6; // 4th bits of hit group index int64_t instLeafPtr : 58; // pointer to BVH instance leaf node (MSB of 64b pointer aligned to 64b) void clear(bool _done, bool _valid) { //*(sycl::int8*) this = sycl::int8(0x7F800000 /* INFINITY */, 0, 0, (_done ? 0x10000000 : 0) | (_valid ? 0x10000), 0, 0, 0, 0); ft = 0.0f; iu = 0; hitGroupIndex0 = 0; iv = 0; hitGroupIndex1 = 0; data = 0; done = _done ? 1 : 0; valid = _valid ? 1 : 0; } }; struct __attribute__ ((packed,aligned(64))) RTStack { union { struct { struct MemHit committedHit; // stores committed hit struct MemHit potentialHit; // stores potential hit that is passed to any hit shader }; struct MemHit hit[2]; // committedHit, potentialHit }; struct MemRay ray[2]; char travStack[32*2]; }; struct __attribute__ ((packed)) HWAccel { uint64_t reserved; float bounds[2][3]; // bounding box of the BVH uint32_t reserved0[8]; uint32_t numTimeSegments; uint32_t reserved1[13]; uint64_t dispatchGlobalsPtr; }; struct __attribute__ ((packed,aligned(8))) PrimLeafDesc { struct { uint32_t shaderIndex : 24; // shader index used for shader record calculations uint32_t geomMask : 8; // geometry mask used for ray masking }; struct { uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene uint32_t type : 1; // enable/disable culling for procedurals and instances uint32_t geomFlags : 2; // geometry flags of this geometry }; }; struct __attribute__ ((packed,aligned(64))) QuadLeafV1 { struct PrimLeafDesc leafDesc; unsigned int primIndex0; struct { uint32_t primIndex1Delta : 16; // delta encoded primitive index of second triangle uint32_t j0 : 2; // specifies first vertex of second triangle uint32_t j1 : 2; // specified second vertex of second triangle uint32_t j2 : 2; // specified third vertex of second triangle uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list uint32_t pad : 9; // unused bits }; float v[4][3]; }; struct __attribute__ ((packed,aligned(64))) QuadLeafV2 { struct PrimLeafDesc leafDesc; unsigned int primIndex0; struct { uint32_t primIndex1Delta : 5; // delta encoded primitive index of second triangle uint32_t pad1 : 11; // not needed here uint32_t j0 : 2; // specifies first vertex of second triangle uint32_t j1 : 2; // specified second vertex of second triangle uint32_t j2 : 2; // specified third vertex of second triangle uint32_t last : 1; // true if the second triangle is the last triangle in a leaf list uint32_t pad : 9; // unused bits }; float v[4][3]; }; struct __attribute__ ((packed,aligned(64))) ProceduralLeaf { static const constexpr uint32_t N = 13; struct PrimLeafDesc leafDesc; // leaf header identifying the geometry struct { uint32_t numPrimitives : 4; // number of stored primitives uint32_t pad : 32-4-N; uint32_t last : N; // bit vector with a last bit per primitive }; uint32_t _primIndex[N]; // primitive indices of all primitives stored inside the leaf }; struct __attribute__ ((packed,aligned(64))) InstanceLeafV1 { /* first 64 bytes accessed during traversal by hardware */ struct Part0 { public: struct { uint32_t shaderIndex : 24; // shader index used to calculate instancing shader in case of software instancing uint32_t geomMask : 8; // geometry mask used for ray masking }; struct { uint32_t instanceContributionToHitGroupIndex : 24; uint32_t pad0 : 5; /* the following two entries are only used for procedural instances */ uint32_t type : 1; // enables/disables opaque culling uint32_t geomFlags : 2; // unused for instances }; struct { uint64_t startNodePtr : 48; // start node where to continue traversal of the instanced object uint64_t instFlags : 8; // flags for the instance (see InstanceFlags) uint64_t pad1 : 8; // unused bits }; float world2obj_vx[3]; // 1st column of Worl2Obj transform float world2obj_vy[3]; // 2nd column of Worl2Obj transform float world2obj_vz[3]; // 3rd column of Worl2Obj transform float obj2world_p[3]; // translation of Obj2World transform (on purpose in first 64 bytes) } part0; /* second 64 bytes accessed during shading */ struct Part1 { struct { uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too uint64_t pad : 16; // unused bits }; uint32_t instanceID; // user defined value per DXR spec uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene) float obj2world_vx[3]; // 1st column of Obj2World transform float obj2world_vy[3]; // 2nd column of Obj2World transform float obj2world_vz[3]; // 3rd column of Obj2World transform float world2obj_p[3]; // translation of World2Obj transform } part1; }; struct __attribute__ ((packed,aligned(64))) InstanceLeafV2 { /* first 64 bytes accessed during traversal by hardware */ struct Part0 { public: uint32_t instanceContributionToHitGroupIndex : 24; // Xe3: instance contribution to hit group index uint32_t geomMask : 8; // Xe1+: geometry mask used for ray masking uint32_t instFlags : 8; // Xe3: flags for the instance (see InstanceFlags) uint32_t ComparisonMode : 1; // Xe3: 0 for <=, 1 for > comparison uint32_t ComparisonValue : 7; // Xe3: to be compared with ray.ComparionMask uint32_t pad0 : 8; // reserved (MBZ) uint32_t subType : 3; // Xe3: geometry sub-type uint32_t pad1 : 2; // reserved (MBZ) uint32_t DisableOpacityCull : 1; // Xe1+: disables opacity culling uint32_t OpaqueGeometry : 1; // Xe1+: determines if geometry is opaque uint32_t IgnoreRayMultiplier : 1; // Xe3: ignores ray geometry multiplier uint64_t startNodePtr; // Xe3: 64 bit start node where of the instanced object float world2obj_vx[3]; // 1st column of Worl2Obj transform float world2obj_vy[3]; // 2nd column of Worl2Obj transform float world2obj_vz[3]; // 3rd column of Worl2Obj transform float obj2world_p[3]; // translation of Obj2World transform (on purpose in first 64 bytes) } part0; /* second 64 bytes accessed during shading */ struct Part1 { struct { uint64_t bvhPtr : 48; // pointer to BVH where start node belongs too uint64_t pad : 16; // unused bits }; uint32_t instanceID; // user defined value per DXR spec uint32_t instanceIndex; // geometry index of the instance (n'th geometry in scene) float obj2world_vx[3]; // 1st column of Obj2World transform float obj2world_vy[3]; // 2nd column of Obj2World transform float obj2world_vz[3]; // 3rd column of Obj2World transform float world2obj_p[3]; // translation of World2Obj transform } part1; }; level-zero-raytracing-support-1.2.3/rttrace/rttrace_validation.cpp000066400000000000000000000261001514453371700255130ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "rttrace_validation.h" #include #define sizeof_QBVH6_InternalNode6 64 #define QBVH6_rootNodeOffset 128 /*struct rayquery_impl_t { rtfence_t fence; rtglobals_t dispatchGlobalsPtr; struct RTStack* rtStack; TraceRayCtrl ctrl; unsigned int bvh_level; };*/ void use_rthwif_production() { } SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag() { return intel_raytracing_ext_flag_ray_query; } SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init(intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i ) { unsigned int bvh_level = 0; //intel_raytracing_acceleration_structure_t* accel_i = sycl::global_ptr(_accel_i).get(); HWAccel* accel = (HWAccel*)accel_i; #if defined(EMBREE_SYCL_ALLOC_DISPATCH_GLOBALS) rtglobals_t dispatchGlobalsPtr = (rtglobals_t) accel->dispatchGlobalsPtr; #else rtglobals_t dispatchGlobalsPtr = (rtglobals_t) intel_get_implicit_dispatch_globals(); #endif struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)intel_get_rt_stack( (rtglobals_t)dispatchGlobalsPtr )).get(); /* init ray */ rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset); rtStack->committedHit.setT(INFINITY); rtStack->committedHit.setU(0.0f); rtStack->committedHit.setV(0.0f); rtStack->committedHit.data = 0; rtStack->potentialHit.setT(INFINITY); rtStack->potentialHit.setU(0.0f); rtStack->potentialHit.setV(0.0f); rtStack->potentialHit.data = 0; rtStack->potentialHit.done = 1; rtStack->potentialHit.valid = 1; return { nullptr, (void*) dispatchGlobalsPtr, rtStack, TRACE_RAY_INITIAL, bvh_level }; } SYCL_EXTERNAL void intel_ray_query_forward_ray( intel_ray_query_t& query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel_i) { HWAccel* accel = (HWAccel*)accel_i; struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); /* init ray */ unsigned int bvh_level = query.bvh_level+1; rtStack->ray[bvh_level].init(ray,(uint64_t)accel + QBVH6_rootNodeOffset); query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_INSTANCE, bvh_level }; } SYCL_EXTERNAL void intel_ray_query_commit_potential_hit( intel_ray_query_t& query ) { struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); unsigned int bvh_level = query.bvh_level; unsigned int rflags = rtStack->ray[bvh_level].rayFlags; if (rflags & intel_ray_flags_accept_first_hit_and_end_search) { rtStack->committedHit = rtStack->potentialHit; rtStack->committedHit.valid = 1; query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_DONE, bvh_level }; } else { rtStack->potentialHit.valid = 1; // FIXME: is this required? query = { nullptr, query.opaque1, query.opaque2, TRACE_RAY_COMMIT, bvh_level }; } } SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override( intel_ray_query_t& query, float override_hit_distance, intel_float2 override_uv ) { //struct RTStack* rtStack = (struct RTStack*) query.opaque2; struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); rtStack->potentialHit.setT(override_hit_distance); rtStack->potentialHit.setU(override_uv.x); rtStack->potentialHit.setV(override_uv.y); intel_ray_query_commit_potential_hit(query); } SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query ) { rtglobals_t dispatchGlobalsPtr = (rtglobals_t) query.opaque1; struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); rtStack->potentialHit.done = 1; rtStack->potentialHit.valid = 1; if (query.ctrl == TRACE_RAY_DONE) return; rtfence_t fence = intel_dispatch_trace_ray_query(dispatchGlobalsPtr,query.bvh_level,query.ctrl); query = { (void*) fence, query.opaque1, query.opaque2, TRACE_RAY_INITIAL, 0 }; } SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query ) { intel_rt_sync((rtfence_t)query.opaque0); /* continue is default behaviour */ struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); unsigned int bvh_level = rtStack->potentialHit.bvhLevel; query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level }; } SYCL_EXTERNAL void intel_sync_ray_query( intel_ray_query_t& query ) { intel_rt_sync((rtfence_t)query.opaque0); /* continue is default behaviour */ struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); unsigned int bvh_level = rtStack->potentialHit.bvhLevel; query = { query.opaque0, query.opaque1, query.opaque2, TRACE_RAY_CONTINUE, bvh_level }; } SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query ) { intel_ray_query_sync(query); query = { nullptr, nullptr, nullptr, TRACE_RAY_INITIAL, 0 }; } SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type ) { return query.hit(hit_type).bvhLevel; } SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type ) { return query.hit(hit_type).getT(); } SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type ) { return { query.hit(hit_type).getU(), query.hit(hit_type).getV() }; } SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type ) { return query.hit(hit_type).frontFace; } SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type ) { struct PrimLeafDesc* __restrict leaf = (struct PrimLeafDesc*)query.hit(hit_type).getPrimLeafPtr(); return leaf->geomIndex; } SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); void* __restrict leaf = hit.getPrimLeafPtr(); if (hit.leafType == NODE_TYPE_QUAD) return ((QuadLeaf*)leaf)->primIndex0 + hit.primIndexDelta; else return ((ProceduralLeaf*)leaf)->_primIndex[hit.primLeafIndex]; } SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); QuadLeaf* __restrict leaf = (QuadLeaf*) hit.getPrimLeafPtr(); return leaf->primIndex0 + hit.primIndexDelta; } SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); ProceduralLeaf* __restrict leaf = (ProceduralLeaf*) hit.getPrimLeafPtr(); return leaf->_primIndex[hit.primLeafIndex]; } SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); if (leaf == nullptr) return -1; return leaf->part1.instanceIndex; } SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); if (leaf == nullptr) return -1; return leaf->part1.instanceID; } SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } }; return { { leaf->part0.world2obj_vx[0], leaf->part0.world2obj_vx[1], leaf->part0.world2obj_vx[2] }, { leaf->part0.world2obj_vy[0], leaf->part0.world2obj_vy[1], leaf->part0.world2obj_vy[2] }, { leaf->part0.world2obj_vz[0], leaf->part0.world2obj_vz[1], leaf->part0.world2obj_vz[2] }, { leaf->part1.world2obj_p [0], leaf->part1.world2obj_p [1], leaf->part1.world2obj_p [2] } }; } SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type ) { MemHit& hit = query.hit(hit_type); InstanceLeaf* __restrict leaf = (InstanceLeaf*) hit.getInstanceLeafPtr(); if (leaf == nullptr) return { { 1,0,0 }, { 0,1,0 }, { 0,0,1 }, { 0,0,0 } }; return { { leaf->part1.obj2world_vx[0], leaf->part1.obj2world_vx[1], leaf->part1.obj2world_vx[2] }, { leaf->part1.obj2world_vy[0], leaf->part1.obj2world_vy[1], leaf->part1.obj2world_vy[2] }, { leaf->part1.obj2world_vz[0], leaf->part1.obj2world_vz[1], leaf->part1.obj2world_vz[2] }, { leaf->part0.obj2world_p [0], leaf->part0.obj2world_p [1], leaf->part0.obj2world_p [2] } }; } SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 verts_out[3], intel_hit_type_t hit_type ) { const QuadLeaf* __restrict leaf = (const QuadLeaf*) query.hit(hit_type).getPrimLeafPtr(); unsigned int j0 = 0, j1 = 1, j2 = 2; if (query.hit(hit_type).primLeafIndex != 0) { j0 = leaf->j0; j1 = leaf->j1; j2 = leaf->j2; } verts_out[0] = { leaf->v[j0][0], leaf->v[j0][1], leaf->v[j0][2] }; verts_out[1] = { leaf->v[j1][0], leaf->v[j1][1], leaf->v[j1][2] }; verts_out[2] = { leaf->v[j2][0], leaf->v[j2][1], leaf->v[j2][2] }; } SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level) { struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); MemRay& ray = rtStack->ray[bvh_level]; return { ray.org[0], ray.org[1], ray.org[2] }; } SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level) { struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); MemRay& ray = rtStack->ray[bvh_level]; return { ray.dir[0], ray.dir[1], ray.dir[2] }; } SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level) { struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); return rtStack->ray[bvh_level].tnear; } SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level) { struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); return (intel_ray_flags_t) rtStack->ray[bvh_level].rayFlags; } SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level) { struct RTStack* __restrict rtStack = sycl::global_ptr((struct RTStack*)query.opaque2).get(); return rtStack->ray[bvh_level].rayMask; } SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query ) { return query.hit(intel_hit_type_potential_hit).done; } SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type) { return query.hit(hit_type).leafType == NODE_TYPE_QUAD ? intel_candidate_type_triangle : intel_candidate_type_procedural; } SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query ) { return query.hit(intel_hit_type_committed_hit).valid; } level-zero-raytracing-support-1.2.3/rttrace/rttrace_validation.h000066400000000000000000000160241514453371700251640ustar00rootroot00000000000000// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" #pragma clang diagnostic ignored "-W#pragma-messages" #include #pragma clang diagnostic pop enum intel_ray_flags_t { intel_ray_flags_none = 0x00, intel_ray_flags_force_opaque = 0x01, // forces geometry to be opaque (no anyhit shader invokation) intel_ray_flags_force_non_opaque = 0x02, // forces geometry to be non-opqaue (invoke anyhit shader) intel_ray_flags_accept_first_hit_and_end_search = 0x04, // terminates traversal on the first hit found (shadow rays) intel_ray_flags_skip_closest_hit_shader = 0x08, // skip execution of the closest hit shader intel_ray_flags_cull_back_facing_triangles = 0x10, // back facing triangles to not produce a hit intel_ray_flags_cull_front_facing_triangles = 0x20, // front facing triangles do not produce a hit intel_ray_flags_cull_opaque = 0x40, // opaque geometry does not produce a hit intel_ray_flags_cull_non_opaque = 0x80, // non-opaque geometry does not produce a hit intel_ray_flags_skip_triangles = 0x100, // treat all triangle intersections as misses. intel_ray_flags_skip_procedural_primitives = 0x200, // skip execution of intersection shaders }; enum intel_hit_type_t { intel_hit_type_committed_hit = 0, intel_hit_type_potential_hit = 1, }; enum intel_raytracing_ext_flag_t { intel_raytracing_ext_flag_ray_query = 1 << 0, // true if ray queries are supported }; struct intel_float2 { float x, y; intel_float2() {} intel_float2(float x, float y) : x(x), y(y) {} intel_float2(sycl::float2 v) : x(v.x()), y(v.y()) {} operator sycl::float2() { return sycl::float2(x,y); } }; struct intel_float3 { float x, y, z; intel_float3() {} intel_float3(float x, float y, float z) : x(x), y(y), z(z) {} intel_float3(sycl::float3 v) : x(v.x()), y(v.y()), z(v.z()) {} operator sycl::float3() { return sycl::float3(x,y,z); } }; struct intel_float4x3 { intel_float3 vx, vy, vz, p; }; struct intel_ray_desc_t { intel_float3 origin; intel_float3 direction; float tmin; float tmax; unsigned int mask; intel_ray_flags_t flags; }; #include "rttrace_internal.h" // opaque types struct intel_ray_query_t { void* opaque0; void* opaque1; void* opaque2; uint32_t ctrl; uint32_t bvh_level; MemHit& hit(intel_hit_type_t ty) { struct RTStack* rtStack = (struct RTStack*) opaque2; return rtStack->hit[ty]; } }; typedef __attribute__((opencl_global )) struct intel_raytracing_acceleration_structure_opaque_t* intel_raytracing_acceleration_structure_t; // check supported ray tracing features SYCL_EXTERNAL intel_raytracing_ext_flag_t intel_get_raytracing_ext_flag(); // initializes a ray query SYCL_EXTERNAL intel_ray_query_t intel_ray_query_init( intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel ); // setup for instance traversal using a transformed ray and bottom-level AS SYCL_EXTERNAL void intel_ray_query_forward_ray( intel_ray_query_t& query, intel_ray_desc_t ray, intel_raytracing_acceleration_structure_t accel ); // commit the potential hit SYCL_EXTERNAL void intel_ray_query_commit_potential_hit( intel_ray_query_t& query ); // commit the potential hit and override hit distance and UVs SYCL_EXTERNAL void intel_ray_query_commit_potential_hit_override( intel_ray_query_t& query, float override_hit_distance, intel_float2 override_uv ); // start traversal of a ray query SYCL_EXTERNAL void intel_ray_query_start_traversal( intel_ray_query_t& query ); // synchronize rayquery execution. If a ray was dispatched, // This must be called prior to calling any of the accessors below. SYCL_EXTERNAL void intel_ray_query_sync( intel_ray_query_t& query ); // signal that a ray query will not be used further. This is the moral equaivalent of a delete // this function does an implicit sync SYCL_EXTERNAL void intel_ray_query_abandon( intel_ray_query_t& query ); // read hit information during shader execution SYCL_EXTERNAL unsigned int intel_get_hit_bvh_level( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL float intel_get_hit_distance( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL intel_float2 intel_get_hit_barycentrics( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL bool intel_get_hit_front_face( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL unsigned int intel_get_hit_geometry_id(intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL unsigned int intel_get_hit_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL unsigned int intel_get_hit_triangle_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for quad leaves SYCL_EXTERNAL unsigned int intel_get_hit_procedural_primitive_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fast path for procedural leaves SYCL_EXTERNAL unsigned int intel_get_hit_instance_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL unsigned int intel_get_hit_instance_user_id( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL intel_float4x3 intel_get_hit_world_to_object( intel_ray_query_t& query, intel_hit_type_t hit_type ); SYCL_EXTERNAL intel_float4x3 intel_get_hit_object_to_world( intel_ray_query_t& query, intel_hit_type_t hit_type ); // fetch triangle vertices for a hit SYCL_EXTERNAL void intel_get_hit_triangle_vertices( intel_ray_query_t& query, intel_float3 vertices_out[3], intel_hit_type_t hit_type ); // Read ray-data. This is used to read transformed rays produced by HW instancing pipeline // during any-hit or intersection shader execution. SYCL_EXTERNAL intel_float3 intel_get_ray_origin( intel_ray_query_t& query, unsigned int bvh_level ); SYCL_EXTERNAL intel_float3 intel_get_ray_direction( intel_ray_query_t& query, unsigned int bvh_level ); SYCL_EXTERNAL float intel_get_ray_tmin( intel_ray_query_t& query, unsigned int bvh_level ); SYCL_EXTERNAL intel_ray_flags_t intel_get_ray_flags( intel_ray_query_t& query, unsigned int bvh_level ); SYCL_EXTERNAL unsigned int intel_get_ray_mask( intel_ray_query_t& query, unsigned int bvh_level ); // if traversal returns one can test if a triangle or procedural is hit enum intel_candidate_type_t { intel_candidate_type_triangle, intel_candidate_type_procedural }; SYCL_EXTERNAL intel_candidate_type_t intel_get_hit_candidate( intel_ray_query_t& query, intel_hit_type_t hit_type ); // test whether traversal has terminated. If false, the ray has reached // a procedural leaf or a non-opaque triangle leaf, and requires shader processing SYCL_EXTERNAL bool intel_is_traversal_done( intel_ray_query_t& query ); // if traversal is done one can test for the presence of a committed hit to either invoke miss or closest hit shader SYCL_EXTERNAL bool intel_has_committed_hit( intel_ray_query_t& query ); level-zero-raytracing-support-1.2.3/testing/000077500000000000000000000000001514453371700211435ustar00rootroot00000000000000level-zero-raytracing-support-1.2.3/testing/CMakeLists.txt000066400000000000000000000617101514453371700237100ustar00rootroot00000000000000## Copyright 2009-2022 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 #PROJECT(rthwif_testing) #CMAKE_MINIMUM_REQUIRED(VERSION 3.1.0) SET(CMAKE_CXX_STANDARD 17) # create and install abn test file SET(ABN_TEST_FILE "${CMAKE_BINARY_DIR}/abn_metadata.json") FILE(WRITE "${ABN_TEST_FILE}" "{\n") SET(ABN_TEST_FIRST "ON") FUNCTION(WRITE_ABN_TEST) CMAKE_PARSE_ARGUMENTS(ABN_TEST "" "NAME;COMMAND;CHECK;DIR" "" ${ARGN}) IF (NOT ABN_TEST_FIRST) FILE(APPEND "${ABN_TEST_FILE}" ",\n") ENDIF() FILE(APPEND "${ABN_TEST_FILE}" " \"${ABN_TEST_NAME}\": {\n") FILE(APPEND "${ABN_TEST_FILE}" " \"api\": \"L0\",\n") GET_FILENAME_COMPONENT(DPCPP_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME_WE) IF (NOT WIN32 AND ABN_TEST_DIR) FILE(APPEND "${ABN_TEST_FILE}" " \"commandLine\": \"run_test.sh ${ABN_TEST_COMMAND}\"") ELSE() FILE(APPEND "${ABN_TEST_FILE}" " \"commandLine\": \"${ABN_TEST_COMMAND}\"") ENDIF() IF (ABN_TEST_DIR) FILE(APPEND "${ABN_TEST_FILE}" ",\n \"workingSubDir\": \"${ABN_TEST_DIR}\"") ENDIF() IF (ABN_TEST_CHECK) FILE(APPEND "${ABN_TEST_FILE}" ",\n \"functionalCheck\": [\"${ABN_TEST_CHECK}\"]") ENDIF() FILE(APPEND "${ABN_TEST_FILE}" "\n") FILE(APPEND "${ABN_TEST_FILE}" " }") ENDFUNCTION() FUNCTION(FINALIZE_ABN_TEST) FILE(APPEND "${ABN_TEST_FILE}" "\n}\n") ENDFUNCTION() #IF (WIN32) # WRITE_ABN_TEST(NAME "level-zero-gpu-raytracing-test" COMMAND "run.bat" CHECK "0 tests failed") # WRITE_ABN_TEST(NAME "level-zero-gpu-raytracing-test-ext" COMMAND "run_ext.bat" CHECK "0 tests failed") #ELSE() # WRITE_ABN_TEST(NAME "level-zero-gpu-raytracing-test" COMMAND "run.sh" CHECK "0 tests failed") # WRITE_ABN_TEST(NAME "level-zero-gpu-raytracing-test-ext" COMMAND "run_ext.sh" CHECK "0 tests failed") #ENDIF() IF (NOT WIN32) INSTALL(FILES "run_test.sh" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT test PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) ENDIF() INSTALL(FILES "${ABN_TEST_FILE}" DESTINATION "${CMAKE_INSTALL_BINDIR}/.." COMPONENT test) SET(BAT_TEST_FILE_EXP "${CMAKE_BINARY_DIR}/run.bat") SET(SHELL_TEST_FILE_EXP "${CMAKE_BINARY_DIR}/run.sh") SET(BAT_TEST_FILE_EXT "${CMAKE_BINARY_DIR}/run_ext.bat") SET(SHELL_TEST_FILE_EXT "${CMAKE_BINARY_DIR}/run_ext.sh") # creation of windows batch test file FUNCTION(INIT_BAT_TEST BAT_TEST_FILE) FILE(WRITE "${BAT_TEST_FILE}" "@echo off\n") FILE(APPEND "${BAT_TEST_FILE}" "set errors=0\n") FILE(APPEND "${BAT_TEST_FILE}" "cd bin\n") FILE(APPEND "${BAT_TEST_FILE}" "\n") ENDFUNCTION() FUNCTION(ADD_BAT_TEST BAT_TEST_FILE) CMAKE_PARSE_ARGUMENTS(BAT_TEST "" "NAME" "COMMAND" ${ARGN}) LIST(POP_FRONT BAT_TEST_COMMAND CMD) LIST(PREPEND BAT_TEST_COMMAND "${CMD}.exe") STRING(REPLACE ";" " " BAT_TEST_COMMAND_STR "${BAT_TEST_COMMAND}") FILE(APPEND "${BAT_TEST_FILE}" "echo Executing test ${BAT_TEST_NAME}\n") FILE(APPEND "${BAT_TEST_FILE}" "echo ${BAT_TEST_COMMAND_STR}\n") FILE(APPEND "${BAT_TEST_FILE}" "${BAT_TEST_COMMAND_STR}\n") FILE(APPEND "${BAT_TEST_FILE}" "if %ERRORLEVEL% equ 0 (\n echo Test ${BAT_TEST_NAME} passed!\n") FILE(APPEND "${BAT_TEST_FILE}" ") else (\n echo Test ${BAT_TEST_NAME} failed!\n set /a errors=errors+1\n)\n\n") FILE(APPEND "${BAT_TEST_FILE}" "echo:\n") FILE(APPEND "${BAT_TEST_FILE}" "echo:\n") FILE(APPEND "${BAT_TEST_FILE}" "\n") ENDFUNCTION() FUNCTION(FINALIZE_BAT_TEST BAT_TEST_FILE) FILE(APPEND "${BAT_TEST_FILE}" "cd ..\n") FILE(APPEND "${BAT_TEST_FILE}" "echo %errors% tests failed\n") FILE(APPEND "${BAT_TEST_FILE}" "if %errors% neq 0 exit /b 1\n") ENDFUNCTION() # creation of Linux batch test file FUNCTION(INIT_SHELL_TEST SHELL_TEST_FILE) FILE(WRITE "${SHELL_TEST_FILE}" "#!/bin/bash\n") FILE(APPEND "${SHELL_TEST_FILE}" "errors=0\n") FILE(APPEND "${SHELL_TEST_FILE}" "cd bin\n") GET_FILENAME_COMPONENT(DPCPP_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME_WE) IF (DPCPP_COMPILER_NAME STREQUAL "icpx") FILE(APPEND "${SHELL_TEST_FILE}" "export LD_LIBRARY_PATH=\$\{LD_LIBRARY_PATH\}:.\n") # required as workaround for ICX ENDIF() FILE(APPEND "${SHELL_TEST_FILE}" "\n") ENDFUNCTION() FUNCTION(ADD_SHELL_TEST SHELL_TEST_FILE) CMAKE_PARSE_ARGUMENTS(SHELL_TEST "" "NAME" "COMMAND" ${ARGN}) LIST(POP_FRONT SHELL_TEST_COMMAND CMD) LIST(PREPEND SHELL_TEST_COMMAND "./${CMD}") STRING(REPLACE ";" " " SHELL_TEST_COMMAND_STR "${SHELL_TEST_COMMAND}") FILE(APPEND "${SHELL_TEST_FILE}" "echo Executing test ${SHELL_TEST_NAME}\n") FILE(APPEND "${SHELL_TEST_FILE}" "echo ${SHELL_TEST_COMMAND_STR}\n") FILE(APPEND "${SHELL_TEST_FILE}" "${SHELL_TEST_COMMAND_STR}\n") FILE(APPEND "${SHELL_TEST_FILE}" "if (($? == 0)); then\n echo Test ${SHELL_TEST_NAME} passed!\n") FILE(APPEND "${SHELL_TEST_FILE}" "else\n echo Test ${SHELL_TEST_NAME} failed!\n ((errors++))\n") FILE(APPEND "${SHELL_TEST_FILE}" "fi\n") FILE(APPEND "${SHELL_TEST_FILE}" "echo\n") FILE(APPEND "${SHELL_TEST_FILE}" "echo\n") FILE(APPEND "${SHELL_TEST_FILE}" "\n") ENDFUNCTION() FUNCTION(FINALIZE_SHELL_TEST SHELL_TEST_FILE) FILE(APPEND "${SHELL_TEST_FILE}" "cd ..\n") FILE(APPEND "${SHELL_TEST_FILE}" "echo $errors tests failed\n") FILE(APPEND "${SHELL_TEST_FILE}" "if (($errors != 0)); then exit 1; fi\n") ENDFUNCTION() # install windows batch test file or linux shell script IF (WIN32) INSTALL(FILES "${BAT_TEST_FILE_EXP}" DESTINATION "${CMAKE_INSTALL_BINDIR}/.." COMPONENT test) INSTALL(FILES "${BAT_TEST_FILE_EXT}" DESTINATION "${CMAKE_INSTALL_BINDIR}/.." COMPONENT test) ELSE() INSTALL(FILES "${SHELL_TEST_FILE_EXP}" DESTINATION "${CMAKE_INSTALL_BINDIR}/.." COMPONENT test PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) INSTALL(FILES "${SHELL_TEST_FILE_EXT}" DESTINATION "${CMAKE_INSTALL_BINDIR}/.." COMPONENT test PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) ENDIF() # install ctest file SET(CTEST_TEST_FILE_ROOT "${CMAKE_BINARY_DIR}/CTestTestfile.root") file(WRITE "${CTEST_TEST_FILE_ROOT}" "# CTestTestfile.cmake self generated for package testing\n") file(APPEND "${CTEST_TEST_FILE_ROOT}" "subdirs(\"bin\")\n") INSTALL(FILES "${CTEST_TEST_FILE_ROOT}" DESTINATION "${CMAKE_INSTALL_BINDIR}/.." RENAME "CTestTestfile.cmake" COMPONENT test) SET(CMAKE_INSTALL_RPATH "$ORIGIN/../${CMAKE_INSTALL_LIBDIR}") SET(CTEST_TEST_FILE "${CMAKE_BINARY_DIR}/CTestTestfile.install") file(WRITE "${CTEST_TEST_FILE}" "# CTestTestfile.cmake self generated for package testing\n") FUNCTION(ADD_CTEST_TEST) CMAKE_PARSE_ARGUMENTS(ADD_CTEST_TEST "" "NAME" "COMMAND" ${ARGN}) STRING(REPLACE ";" " " ADD_CTEST_TEST_COMMAND_STR "${ADD_CTEST_TEST_COMMAND}") FILE(APPEND "${CTEST_TEST_FILE}" "add_test(${ADD_CTEST_TEST_NAME} ./${ADD_CTEST_TEST_COMMAND_STR})\n") ADD_TEST(NAME ${ADD_CTEST_TEST_NAME} COMMAND ${ADD_CTEST_TEST_COMMAND} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) ENDFUNCTION() FUNCTION(ADD_ABN_TEST) CMAKE_PARSE_ARGUMENTS(SHELL_TEST "" "NAME;EXT" "COMMAND" ${ARGN}) LIST(POP_FRONT SHELL_TEST_COMMAND CMD) LIST(PREPEND SHELL_TEST_COMMAND "./${CMD}") STRING(REPLACE ";" " " SHELL_TEST_COMMAND_STR "${SHELL_TEST_COMMAND}") WRITE_ABN_TEST(NAME "level-zero-gpu-raytracing-${SHELL_TEST_NAME}${SHELL_TEST_EXT}" COMMAND ${SHELL_TEST_COMMAND_STR} CHECK "PASSED" DIR "bin") ENDFUNCTION() #FUNCTION(MY_ADD_TEST) # ADD_CTEST_TEST(${ARGN}) # ADD_BAT_TEST(${BAT_TEST_FILE_EXP} ${ARGN}) # ADD_SHELL_TEST(${SHELL_TEST_FILE_EXP} ${ARGN}) #ENDFUNCTION() #FUNCTION(MY_ADD_TEST_EXT) # ADD_CTEST_TEST(${ARGN}) # ADD_BAT_TEST(${BAT_TEST_FILE_EXT} ${ARGN}) # ADD_SHELL_TEST(${SHELL_TEST_FILE_EXT} ${ARGN}) #ENDFUNCTION() FUNCTION(ADD_INDIVIDUAL_TEST) CMAKE_PARSE_ARGUMENTS(ADD_INDIVIDUAL_TEST "C;G;S32" "" "" ${ARGN}) IF (ADD_INDIVIDUAL_TEST_C) ADD_CTEST_TEST(${ARGN}) ENDIF() IF (ADD_INDIVIDUAL_TEST_G) ADD_ABN_TEST(${ARGN}) ENDIF() IF (ADD_INDIVIDUAL_TEST_S32) ADD_ABN_TEST(EXT _simd32 ${ARGN}) ENDIF() ENDFUNCTION() GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header (FIXME: why required?) IF (ZE_RAYTRACING_RT_SIMULATION) SET(RT_SIM_LIBRARY rtcore) ADD_COMPILE_DEFINITIONS(ZE_RAYTRACING_RT_SIMULATION) ENDIF() ADD_EXECUTABLE(embree_rthwif_cornell_box rthwif_cornell_box.cpp) TARGET_LINK_LIBRARIES(embree_rthwif_cornell_box sys simd tbb ze_wrapper ${RT_SIM_LIBRARY} ${EMBREE_RTHWIF_SYCL}) SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64") SET_PROPERTY(TARGET embree_rthwif_cornell_box APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"") TARGET_COMPILE_DEFINITIONS(embree_rthwif_cornell_box PUBLIC EMBREE_LEVEL_ZERO ZE_RAYTRACING) ADD_CUSTOM_COMMAND(TARGET embree_rthwif_cornell_box POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/cornell_box_reference.tga" "$") ADD_EXECUTABLE(embree_rthwif_test rthwif_test.cpp) TARGET_LINK_LIBRARIES(embree_rthwif_test sys simd tbb ze_wrapper ${RT_SIM_LIBRARY} ${EMBREE_RTHWIF_SYCL}) SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64") SET_PROPERTY(TARGET embree_rthwif_test APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"") TARGET_COMPILE_DEFINITIONS(embree_rthwif_test PUBLIC EMBREE_LEVEL_ZERO ZE_RAYTRACING) ADD_EXECUTABLE(embree_rthwif_cornell_box_ext rthwif_cornell_box_ext.cpp) TARGET_LINK_LIBRARIES(embree_rthwif_cornell_box_ext sys simd tbb ze_wrapper ${RT_SIM_LIBRARY} ${EMBREE_RTHWIF_SYCL}) SET_PROPERTY(TARGET embree_rthwif_cornell_box_ext APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64") SET_PROPERTY(TARGET embree_rthwif_cornell_box_ext APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"") TARGET_COMPILE_DEFINITIONS(embree_rthwif_cornell_box_ext PUBLIC EMBREE_LEVEL_ZERO ZE_RAYTRACING) #ADD_CUSTOM_COMMAND(TARGET embree_rthwif_cornell_box_ext POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/cornell_box_reference.tga" "$") ADD_EXECUTABLE(embree_rthwif_test_ext rthwif_test_ext.cpp) TARGET_LINK_LIBRARIES(embree_rthwif_test_ext sys simd tbb ze_wrapper ${RT_SIM_LIBRARY} ${EMBREE_RTHWIF_SYCL}) SET_PROPERTY(TARGET embree_rthwif_test_ext APPEND PROPERTY COMPILE_FLAGS "-fsycl -fsycl-targets=spir64") SET_PROPERTY(TARGET embree_rthwif_test_ext APPEND PROPERTY LINK_FLAGS "-fsycl -fsycl-targets=spir64 -Xsycl-target-backend=spir64 \" -cl-intel-greater-than-4GB-buffer-required \"") TARGET_COMPILE_DEFINITIONS(embree_rthwif_test_ext PUBLIC EMBREE_LEVEL_ZERO ZE_RAYTRACING) IF (ZE_RAYTRACING_SYCL_TESTS STREQUAL "INTERNAL_RTAS_BUILDER") SET(RTAS_BUILDER_MODE "--internal-rtas-builder") ELSEIF (ZE_RAYTRACING_SYCL_TESTS STREQUAL "LEVEL_ZERO_RTAS_BUILDER") SET(RTAS_BUILDER_MODE "--level-zero-rtas-builder") ELSE() MESSAGE(FATAL_ERROR "invalid test mode") ENDIF() # EXP API version tests INIT_BAT_TEST(${BAT_TEST_FILE_EXP}) INIT_SHELL_TEST(${SHELL_TEST_FILE_EXP}) ADD_INDIVIDUAL_TEST(C G NAME cornell_box_exp COMMAND embree_rthwif_cornell_box ${RTAS_BUILDER_MODE} --compare cornell_box_reference.tga) SET(ABN_TEST_FIRST "OFF") ADD_INDIVIDUAL_TEST(C G NAME cornell_box_instance_exp COMMAND embree_rthwif_cornell_box ${RTAS_BUILDER_MODE} --compare cornell_box_reference.tga --instance) IF (NOT ZE_RAYTRACING_RT_SIMULATION) ADD_INDIVIDUAL_TEST(C G NAME cornell_box_device_memory_exp COMMAND embree_rthwif_cornell_box ${RTAS_BUILDER_MODE} --compare cornell_box_reference.tga --instance --device-memory) ENDIF() ADD_INDIVIDUAL_TEST(C G NAME builder_triangles_expected_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_triangles --build_mode_expected) ADD_INDIVIDUAL_TEST(C G NAME builder_procedurals_expected_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_procedurals --build_mode_expected) ADD_INDIVIDUAL_TEST(C G NAME builder_instances_expected_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_instances --build_mode_expected) ADD_INDIVIDUAL_TEST(C G NAME builder_mixed_expected_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_mixed --build_mode_expected) ADD_INDIVIDUAL_TEST(C NAME benchmark_triangles_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --benchmark_triangles) ADD_INDIVIDUAL_TEST(C NAME benchmark_procedurals_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --benchmark_procedurals) ADD_INDIVIDUAL_TEST(C G NAME builder_triangles_worst_case_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_triangles --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME builder_procedurals_worst_case_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_procedurals --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME builder_instances_worst_case_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_instances --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME builder_mixed_worst_case_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --build_test_mixed --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME triangles_committed_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --no-instancing --triangles-committed-hit) ADD_INDIVIDUAL_TEST(C G NAME triangles_potential_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --no-instancing --triangles-potential-hit) ADD_INDIVIDUAL_TEST(C G NAME triangles_anyhit_shader_commit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --no-instancing --triangles-anyhit-shader-commit) ADD_INDIVIDUAL_TEST(C G NAME triangles_anyhit_shader_reject_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --no-instancing --triangles-anyhit-shader-reject) ADD_INDIVIDUAL_TEST(C G NAME procedurals_committed_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --no-instancing --procedurals-committed-hit) ADD_INDIVIDUAL_TEST(C G NAME hwinstancing_triangles_committed_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --hw-instancing --triangles-committed-hit) ADD_INDIVIDUAL_TEST(C G NAME hwinstancing_triangles_potential_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --hw-instancing --triangles-potential-hit) ADD_INDIVIDUAL_TEST(C G NAME hwinstancing_triangles_anyhit_shader_commit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --hw-instancing --triangles-anyhit-shader-commit) ADD_INDIVIDUAL_TEST(C G NAME hwinstancing_triangles_anyhit_shader_reject_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --hw-instancing --triangles-anyhit-shader-reject) ADD_INDIVIDUAL_TEST(C G NAME hwinstancing_procedurals_committed_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --hw-instancing --procedurals-committed-hit) ADD_INDIVIDUAL_TEST(C G NAME swinstancing_triangles_committed_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --sw-instancing --triangles-committed-hit) ADD_INDIVIDUAL_TEST(C G NAME swinstancing_triangles_potential_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --sw-instancing --triangles-potential-hit) ADD_INDIVIDUAL_TEST(C G NAME swinstancing_triangles_anyhit_shader_commit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --sw-instancing --triangles-anyhit-shader-commit) ADD_INDIVIDUAL_TEST(C G NAME swinstancing_triangles_anyhit_shader_reject_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --sw-instancing --triangles-anyhit-shader-reject) ADD_INDIVIDUAL_TEST(C G NAME swinstancing_procedurals_committed_hit_exp COMMAND embree_rthwif_test ${RTAS_BUILDER_MODE} --sw-instancing --procedurals-committed-hit) FINALIZE_BAT_TEST(${BAT_TEST_FILE_EXP}) FINALIZE_SHELL_TEST(${SHELL_TEST_FILE_EXP}) # EXT API version tests INIT_BAT_TEST(${BAT_TEST_FILE_EXT}) INIT_SHELL_TEST(${SHELL_TEST_FILE_EXT}) ADD_INDIVIDUAL_TEST(C G NAME cornell_box_ext COMMAND embree_rthwif_cornell_box_ext ${RTAS_BUILDER_MODE} --compare cornell_box_reference.tga) ADD_INDIVIDUAL_TEST(C G NAME cornell_box_instance_ext COMMAND embree_rthwif_cornell_box_ext ${RTAS_BUILDER_MODE} --compare cornell_box_reference.tga --instance) IF (NOT ZE_RAYTRACING_RT_SIMULATION) ADD_INDIVIDUAL_TEST(C G NAME cornell_box_device_memory_ext COMMAND embree_rthwif_cornell_box_ext ${RTAS_BUILDER_MODE} --compare cornell_box_reference.tga --instance --device-memory) ENDIF() ADD_INDIVIDUAL_TEST(C G NAME builder_triangles_expected_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_triangles --build_mode_expected) ADD_INDIVIDUAL_TEST(C G NAME builder_procedurals_expected_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_procedurals --build_mode_expected) ADD_INDIVIDUAL_TEST(C G NAME builder_instances_expected_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_instances --build_mode_expected) ADD_INDIVIDUAL_TEST(C G NAME builder_mixed_expected_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_mixed --build_mode_expected) ADD_INDIVIDUAL_TEST(C NAME benchmark_triangles_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --benchmark_triangles) ADD_INDIVIDUAL_TEST(C NAME benchmark_procedurals_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --benchmark_procedurals) ADD_INDIVIDUAL_TEST(C G NAME builder_triangles_worst_case_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_triangles --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME builder_procedurals_worst_case_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_procedurals --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME builder_instances_worst_case_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_instances --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G NAME builder_mixed_worst_case_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --build_test_mixed --build_mode_worst_case) ADD_INDIVIDUAL_TEST(C G S32 NAME triangles_committed_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --no-instancing --triangles-committed-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME triangles_potential_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --no-instancing --triangles-potential-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME triangles_anyhit_shader_commit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --no-instancing --triangles-anyhit-shader-commit) ADD_INDIVIDUAL_TEST(C G S32 NAME triangles_anyhit_shader_reject_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --no-instancing --triangles-anyhit-shader-reject) ADD_INDIVIDUAL_TEST(C G S32 NAME procedurals_committed_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --no-instancing --procedurals-committed-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME hwinstancing_triangles_committed_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --hw-instancing --triangles-committed-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME hwinstancing_triangles_potential_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --hw-instancing --triangles-potential-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME hwinstancing_triangles_anyhit_shader_commit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --hw-instancing --triangles-anyhit-shader-commit) ADD_INDIVIDUAL_TEST(C G S32 NAME hwinstancing_triangles_anyhit_shader_reject_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --hw-instancing --triangles-anyhit-shader-reject) ADD_INDIVIDUAL_TEST(C G S32 NAME hwinstancing_procedurals_committed_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --hw-instancing --procedurals-committed-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME swinstancing_triangles_committed_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --sw-instancing --triangles-committed-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME swinstancing_triangles_potential_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --sw-instancing --triangles-potential-hit) ADD_INDIVIDUAL_TEST(C G S32 NAME swinstancing_triangles_anyhit_shader_commit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --sw-instancing --triangles-anyhit-shader-commit) ADD_INDIVIDUAL_TEST(C G S32 NAME swinstancing_triangles_anyhit_shader_reject_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --sw-instancing --triangles-anyhit-shader-reject) ADD_INDIVIDUAL_TEST(C G S32 NAME swinstancing_procedurals_committed_hit_ext COMMAND embree_rthwif_test_ext ${RTAS_BUILDER_MODE} --sw-instancing --procedurals-committed-hit) FINALIZE_ABN_TEST() FINALIZE_BAT_TEST(${BAT_TEST_FILE_EXT}) FINALIZE_SHELL_TEST(${SHELL_TEST_FILE_EXT}) INSTALL(TARGETS embree_rthwif_cornell_box RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT test) INSTALL(TARGETS embree_rthwif_cornell_box_ext RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT test) INSTALL(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cornell_box_reference.tga" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT test) INSTALL(TARGETS embree_rthwif_test RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT test) INSTALL(TARGETS embree_rthwif_test_ext RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT test) INSTALL(FILES "${CTEST_TEST_FILE}" DESTINATION "${CMAKE_INSTALL_BINDIR}" RENAME "CTestTestfile.cmake" COMPONENT test) ############################################################## # Install SYCL specific files ############################################################## IF (ZE_RAYTRACING_SYCL_TESTS) GET_FILENAME_COMPONENT(DPCPP_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH) GET_FILENAME_COMPONENT(DPCPP_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME_WE) IF (WIN32) FILE(GLOB_RECURSE LIB_SYCL_LIB_FILES "${DPCPP_COMPILER_DIR}/../lib/sycl?.lib") IF (NOT LIB_SYCL_LIB_FILES) SET(LIB_SYCL_LIB_FILES "${DPCPP_COMPILER_DIR}/../lib/sycl?.lib") ENDIF() INSTALL(FILES ${LIB_SYCL_LIB_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) FILE(GLOB_RECURSE LIB_SYCL_DLL_FILES "${DPCPP_COMPILER_DIR}/../bin/sycl?.dll") IF (NOT LIB_SYCL_DLL_FILES) SET(LIB_SYCL_DLL_FILES "${DPCPP_COMPILER_DIR}/../bin/sycl?.dll") ENDIF() INSTALL(FILES ${LIB_SYCL_DLL_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../bin/pi_level_zero.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../bin/pi_win_proxy_loader.dll" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) ELSE() IF (DPCPP_COMPILER_NAME STREQUAL "clang++") FILE(GLOB_RECURSE LIB_SYCL_FILES "${DPCPP_COMPILER_DIR}/../lib/libsycl.so.*") INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libpi_level_zero.so" DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) ELSE() FILE(GLOB_RECURSE LIB_SYCL_FILES "${DPCPP_COMPILER_DIR}/../lib/libsycl.so.*") INSTALL(FILES ${LIB_SYCL_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) FILE(GLOB_RECURSE LIB_URLOADER_FILES "${DPCPP_COMPILER_DIR}/../lib/libur_loader.so.*") INSTALL(FILES ${LIB_URLOADER_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) FILE(GLOB_RECURSE LIB_URADAPTER_FILES "${DPCPP_COMPILER_DIR}/../lib/libur_adapter_level_zero.so.*") INSTALL(FILES ${LIB_URADAPTER_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) FILE(GLOB_RECURSE LIB_HWLOC_FILES "${DPCPP_COMPILER_DIR}/../lib/libhwloc.so.*") FOREACH(so_file ${LIB_HWLOC_FILES}) # In newer versions of CMake FOLLOW_SYMLINKS can be used instead of this mechanism GET_FILENAME_COMPONENT(name "${so_file}" NAME) GET_FILENAME_COMPONENT(real_file "${so_file}" REALPATH) INSTALL(FILES "${real_file}" DESTINATION "${CMAKE_INSTALL_BINDIR}" RENAME "${name}" COMPONENT lib) ENDFOREACH() FILE(GLOB_RECURSE LIB_UMF_FILES "${DPCPP_COMPILER_DIR}/../lib/libumf.so.*" "${DPCPP_COMPILER_DIR}/../../../umf/latest/libumf.so.*") INSTALL(FILES ${LIB_UMF_FILES} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libsvml.so" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libirng.so" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libimf.so" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libintlc.so.5" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libiomp5.so" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) IF (EXISTS "${DPCPP_COMPILER_DIR}/../lib/libpi_level_zero.so") INSTALL(FILES "${DPCPP_COMPILER_DIR}/../lib/libpi_level_zero.so" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT lib) ENDIF() ENDIF() ENDIF() ENDIF() level-zero-raytracing-support-1.2.3/testing/cornell_box.tga000066400000000000000000030000221514453371700241430ustar00rootroot00000000000000                                                          ! ! " " # # $ $ % % & & ' ' ( ( ) ) * * + + , , - - . . / / 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 : : ; ; < < = = > > ? ? @ @ A A B B C C D D D E E F F G G H H I I J J K K L L M M N N O O P P Q Q R R S S T T U U V V W W X X Y Y Z Z [ [ \ \ ] ] ^ ^ _ _ ` ` a a b b c c d d e e f f g g h h i i j j k k l l m m n n n o o p p q q r r s s t t u~ u~ v} v} w| w| x{ x{ yz yz zy zy {x {x |w |w }v }v ~u ~u t t s s r r q q p p o o n n m m l l k k j j i i h h g g f f e e d d c c b b a a ` ` _ _ ^ ^ ] ] \ \ [ [ Z Z Y Y X X W W V V U U U T T S S R R Q Q P P O O N N M M L L K K J J I I H H G G F F E E D D C C B B A A @ @ ? ? > > = = < < ; ; : : 9 9 8 8 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0 0 / / . . - - , , + + + * * ) ) ( ( ' ' & & % % $ $ # # " " ! !                                                                                                                   ! ! " " # # $ $ % % & & ' ' ( ( ) ) * * + + , , - - . . . / / 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 : : ; ; < < = = > > ? ? @ @ A A B B C C D D E E F F G G H H I I J J K K L L M M N N O O P P Q Q R R S S T T U U V V W W X X Y Y Z Z [ [ \ \ ] ] ^ ^ _ _ ` ` a a b b c c d d e e f f g g h h i i j j k k l l m m m n n o o p p q q r r s s~ t~ t} u} u| v| v{ w{ wz xz xy yy yx zx zw {w {v |v |u }u }t ~t ~s s r r q q p p o o n n m m l l k k j j i i h h g g f f e e d d c c b b a a ` ` _ _ ^ ^ ] ] \ \ [ [ Z Z Y Y X X W W V V U U T T S S R R Q Q P P O O N N M M L L K K J J I I H H G G F F E E D D C C B B A A @ @ @ ? ? > > = = < < ; ; : : 9 9 8 8 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0 0 / / . . - - , , + + * * ) ) ( ( ' ' & & % % $ $ # # " " ! !                                                                                  !!""##$$%%&&''(())**++,,--..//00112233445566778899::;;<<==>>??@@AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\\]]^^__``aabbccddeeffgghhiiijjkkllmmnnooppqqr~r~s}s}t|t|u{u{vzvzwywyxxxxywywzvzv{u{u|t|t}s}s~r~rqqppoonnmmllkkjjiihhggffeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@@??>>==<<;;::99887766554433221100//..--,,++**))((''&&%%$$##""!!                                            !!""##$$%%&&''(())**++,,--..//00112233445566778899::;;<<==>>??@@AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\\]]^^__``aabbccddeeffgghhiijjkkllmmnnoopp~q~q}r}r|s|s{t{tzuzuyvyvxwxwwxwxvyvyuzuzt{t{s|s|r}r}q~q~ppoonnmmllkkjjiihhggffeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@@??>>==<<;;::99887766554433221100//..--,,++**))((''&&%%$$##""!!                                            !!""##$$%%&&''(())**++,,--..//00112233445566778899::;;<<==>>??@@AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\\]]^^__``aabbccddeeffgghhiijjkkllmmnn~o~o}p}p|q|q{r{rzszsytytxuxuwvwvvwvwuxuytytzszs{r{r|q|q}p}p~o~onnmmllkkjjiihhggffeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@@??>>==<<;;::99887766554433221100//..--,,++**))((''&&%%$$##""!!                                          !!""##$$%%&&''(())**++,,--..//0011223345566778899::;;<<==>>??@@AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\\]]^^__``aabbccddeeffgghhiijjkkllm~m~n}n}o|o|p{p{qzqzryrysxsxtwuwuvvvvuwuwtxtxsysyrzrzq{q{p|p|o}o}n~n~mmllkkjjiihhggffeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@@?>>==<<;;::99887766554433221100//..--,,++**))((''&&%%$$##""!!                                    !!""##$$%%&&''(())**++,,--..//00112233445566778899::;;<<==>>??@@AABBCCDDEEFFGGHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\\]]^^__``aabbccddeeffgghhiijjkk~l~l}m}m|n|n{o{ozpzpyqyqxrxrwswtvtvuuuuvtvtwswsxrxryqyqzpzp{o{o|n|n}m}m~l~lkkjjiihhggffeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@@??>>==<<;;::99887766554433221100//..--,,++*))((''&&%%$$##""!!                              !!""##$$%%&&''(())**++,,--..//0011223445566778899::;;<<==>>??@@AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRSSTTUUVVWWXXYYZZ[[\\]]^^__``aabbccddeeffgghhiij~j~k}k}l|l|m{m{nznzoyoypxpxqwqwrvsvsututtutusvsvrwrwqxqxpypyozozn{n{m|m|l}l}k~k~jjiihhggffeeddccbbaa``_^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@@?>>==<<;;::99887766554433221100//..--,,++**))((''&&%%$$##""!!                        !!""##$$%%&''(())**++,,--..//00112233445566778899::;;<<==>>?@@AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXYYZZ[[\\]]^^__``aabbccddeeffgghh~i~i}j}j|k|k{l{lzmzmynynxoxowpwpvqvquruststtstsururvqvqwpwpxoxoynynzmzm{l{l|k|k}j}j~i~ihhggfeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLKKJJIIHHGGFFEEDDCCBBAA@@??>>==<<;;::9988776655443221100//..--,,++**))((''&&%%$$##""!!                               !!""##$$%%&&''(())**++,,--..//0011233445566778899::;;<<==>>??@@AABBCCDDEEFFGHHIIJJKKLLMMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\]]^^__``aabbccddeeffg~g~h}h}i|i|j{j{kzkzlylymxmxnwnwovovpupuqtqtrssssrtrtququpvpvowownxnxmymylzlzk{k{j|i|i}h}h~g~gffeeddccbbaa``__^^]]\\[[ZZYYXXWWVVUTTSSRRQQPPOONNMMLLKKJJIIHHGGFFEEDDCCBBAA@??>>==<<;;::99887766554433221100//..--,,++*))((''&&%%$$##""!!                                     !!""##$$%%&&''(()**++,,--..//00112233445566778899::;<<==>>??@@AABBCCDDEEFFGGHHIIJJKKLLMNNOOPPQQRRSSTTUUVVWWXXYYZZ[[\\]]^^_``aabbccddee~f~f}g}g|h|h{i{izjzjykykxlxlwmwmvnvnuouotptpsqsrrrrsqsqtptpuouovnvnwmwlxlxkykyjzjzi{i{h|h|g}g}f~f~eeddccbbaa``__^^]]\\[ZZYYXXWWVVUUTTSSRRQQPPOONNMMLLKKJJIHHGGFFEEDDCCBBAA@@??>>==<<;;::9988766554433221100//..--,,++**))((''&&%%$##""!!                                            !!"##$$%%&&''(())**++,,--..//0011233445566778899::;;<<==>>??@@AABCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRSSTTUUVVWWXXYYZZ[[\\]]^^__``abbccd~d~e}e}f|f|g{g{hzhziyiyjxjxkwkwlvlvmumuntntososprprqqrqrpspsotntnumumvlvlwkwkxjxjyiyizhzh{g{g|f|f}e}e~d~dccbbaa``_^^]]\\[[ZZYYXXWWVVUUTTSSRRQQPPONNMMLLKKJJIIHHGGFFEEDDCCBBAA@??>>==<<;;::9988776655443322110//..--,,++**))((''&&%%$$##""!!                                             !!""##$$%%&&''(())**+,,--..//0011223344556677889::;;<<==>>??@@AABBCCDDEEFFGHHIIJJKKLLMMNNOOPPQQRRSSTTUVVWWXXYYZZ[[\\]]^^__``aab~b~c}d}d|e|e{f{fzgzgyhyhxixiwjwjvkvkulultmtmsnsnroroqppppqorornsnsmtmtlulukvkvjwjwixixhyhygzgzf{f{e|e|d}d}c~b~baa``__^^]]\\[[ZZYYXXWWVVUTTSSRRQQPPOONNMMLLKKJJIIHHGFFEEDDCCBBAA@@??>>==<<;;::9887766554433221100//..--,,+**))((''&&%%$$##""!!                                            !!""##$$%&&''(())**++,,--..//0011233445566778899::;;<<==>??@@AABBCCDDEEFFGGHHIIJJKLLMMNNOOPPQQRRSSTTUUVVWWXYYZZ[[\\]]^^__``~a~a}b}b|c|c{d{ezezfyfygxgxhwhwivivjujuktktlslsmrmqnqnpopoopopnqnrmrmslsltktkujujviviwhwhxgxgyfyezezd{d{c|c|b}b}a~a``__^^]]\\[[ZZYXXWWVVUUTTSSRRQQPPOONNMMLKKJJIIHHGGFFEEDDCCBBAA@??>>==<<;;::9988776655443221100//..--,,++**))((''&%%$$##""!!                                                                                     ! ! " " # # $ $ % % & & ' ' ( ( ) ) * * + + , - - . . / / 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 8 8 9 9 : : ; ; < < = = > > ? ? @ @ A A B B C D D E E F F G G H H I I J J K K L L M M N O O P P Q Q R R S S T T U U V V W W X X Y Y Z [ [ \ \ ] ] ^ ^ _~ _~ `} `} a| a| b{ b{ cz cz dy dy ex fx fw gw gv hv hu iu it jt js kr kr lq lq mp mp no no on on pm pm ql rl rk sk sj tj ti ui uh vg vg wf wf xe xe yd yd zc zc {b {b |a |a }` ~` ~_ _ ^ ^ ] ] \ [ [ Z Z Y Y X X W W V V U U T T S S R R Q P P O O N N M M L L K K J J I I H H G G F F E D D C C B B A A @ @ ? ? > > = = < < ; ; : 9 9 8 8 7 7 6 6 5 5 4 4 3 3 2 2 1 1 0 0 / / . - - , , + + * * ) ) ( ( ' ' & & % % $ $ # " " ! !                                                         "!!                     !!""""""""""""""""""" " " " " " " " " " """"""""""""""""""""""""""""""""""" " "!"!"""""#"#"$"$"%"%"&"&"'"("(")")"*"*"+"+",","-"-"."."/"/"0"0"1"1"2"3"3"4"4"5"5"6"6"7"7"8"8"9"9":":";";"<"="=">">"?"?"@"@"A"A"B"B"C"C"D"D"E"E"F"F"G"H"H"I"I"J"J"K"K"L"L"M"M"N"N"O"O"P"P"Q"R"R"S"S"T"T"U"U"V"V"W"W"X"X"Y"Y"Z"Z"["["\"]"]~"^~"^}"_}"_|"`|"`{"a{"az"bz"by"cy"cx"dx"dw"ew"ev"fv"gu"gu"ht"hs"is"ir"jr"jq"kq"kp"lp"lo"mo"mn"nn"nm"om"ol"pl"pk"qk"rj"ri"si"sh"th"tg"ug"uf"vf"ve"we"wd"xd"xc"yc"yb"zb"za"{a"|`"|`"}_"}^"~^"~]"]"\"\"["["Z"Z"Y"Y"X"X"W"W"V"V"U"T"T"S"S"R"R"Q"Q"P"P"O"O"N"N"M"M"L"L"K"K"J"I"I"H"H"G"G"F"F"E"E"D"D"C"C"B"B"A"A"@"?"?">">"="="<"<";";":":"9"9"8"8"7"7"6"6"5"4"4"3"3"2"2"1"1"0"0"/"/"."."-"-",","+"*"*")")"("("'"'"&"&"%"%"$"$"#"#"""""!"!" """""""""""""""""""""""""""""""""""" " " " " " " " " """"""""""""""""""      "###""!!                     !!""##$$$$$$$$$$$$$$$$$$ $ $ $ $ $ $ $ $ $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ $!$!$"$"$#$$$$$%$%$&$&$'$'$($($)$)$*$*$+$+$,$,$-$.$.$/$/$0$0$1$1$2$2$3$3$4$4$5$5$6$7$7$8$8$9$9$:$:$;$;$<$<$=$=$>$>$?$?$@$A$A$B$B$C$C$D$D$E$E$F$F$G$G$H$H$I$I$J$K$K$L$L$M$M$N$N$O$O$P$P$Q$Q$R$R$S$S$T$U$U$V$V$W$W$X$X$Y$Y$Z$Z$[$[~$\~$\}$]}$^|$^|$_{$_{$`z$`z$ay$ay$bx$bx$cw$cw$dv$dv$eu$et$ft$fs$gs$hr$hr$iq$iq$jp$jp$ko$ko$ln$ln$mm$mm$nl$nk$ok$oj$pj$pi$qi$rh$rh$sg$sg$tf$tf$ue$ue$vd$vd$wc$wc$xb$xa$ya$y`$z`$z_${_$|^$|^$}]$}]$~\$~\$[$[$Z$Z$Y$Y$X$W$W$V$V$U$U$T$T$S$S$R$R$Q$Q$P$P$O$O$N$M$M$L$L$K$K$J$J$I$I$H$H$G$G$F$F$E$D$D$C$C$B$B$A$A$@$@$?$?$>$>$=$=$<$<$;$:$:$9$9$8$8$7$7$6$6$5$5$4$4$3$3$2$2$1$0$0$/$/$.$.$-$-$,$,$+$+$*$*$)$)$($($'$&$&$%$%$$$$$#$#$"$"$!$!$ $ $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $ $ $ $ $ $ $ $ $$$$$$$$$$$$$$$$$#!    "#%%$$##""!!                      !!""##$$%%%%%%%%%%%%%%%%%%% % % % % % % % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % %!%!%"%"%#%#%$%$%%%%%&%&%'%'%(%)%)%*%*%+%+%,%,%-%-%.%.%/%/%0%0%1%1%2%3%3%4%4%5%5%6%6%7%7%8%8%9%9%:%:%;%<%<%=%=%>%>%?%?%@%@%A%A%B%B%C%C%D%E%E%F%F%G%G%H%H%I%I%J%J%K%K%L%L%M%N%N%O%O%P%P%Q%Q%R%R%S%S%T%T%U%U%V%W%W%X%X%Y%Y%Z~%Z~%[}%[}%\|%\|%]{%]{%^z%^z%_y%`y%`x%ax%aw%bw%bv%cu%cu%dt%dt%es%es%fr%fr%gq%gq%hp%ip%io%jo%jn%kn%km%ll%ll%mk%mk%nj%nj%oi%oi%ph%ph%qg%rg%rf%sf%se%te%td%uc%uc%vb%vb%wa%wa%x`%x`%y_%y_%z^%{^%{]%|]%|\%}\%}[%~Z%~Z%Y%Y%X%X%W%W%V%V%U%U%T%T%S%S%R%Q%Q%P%P%O%O%N%N%M%M%L%L%K%K%J%J%I%H%H%G%G%F%F%E%E%D%D%C%C%B%B%A%A%@%?%?%>%>%=%=%<%<%;%;%:%:%9%9%8%8%7%6%6%5%5%4%4%3%3%2%2%1%1%0%0%/%/%.%-%-%,%,%+%+%*%*%)%)%(%(%'%'%&%&%%%$%$%#%#%"%"%!%!% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % % % % % % % %%%%%%%%%%%%%%%%%%#!    "#%''&%%$$##""!!                      !!"##$$%%&&''''''''''''''''''' ' ' ' ' ' ' ' ' ''''''''''''''''''''''''''''''''''' ' '!'!'"'"'#'#'$'$'%'&'&'''''('(')')'*'*'+'+',','-'.'.'/'/'0'0'1'1'2'2'3'3'4'4'5'5'6'7'7'8'8'9'9':':';';'<'<'='='>'?'?'@'@'A'A'B'B'C'C'D'D'E'E'F'F'G'H'H'I'I'J'J'K'K'L'L'M'M'N'N'O'P'P'Q'Q'R'R'S'S'T'T'U'U'V'V'W'X'X~'Y~'Y}'Z}'Z|'[|'[{'\{'\z']z']y'^y'^x'_x'_w'`v'av'au'bu'bt'ct'cs'ds'dr'er'eq'fq'fp'gp'go'ho'in'im'jm'jl'kl'kk'lk'lj'mj'mi'ni'nh'oh'og'pg'pf'qe're'rd'sd'sc'tc'tb'ub'ua'va'v`'w`'w_'x_'x^'y^'z]'z\'{\'{['|['|Z'}Z'}Y'~Y'~X'X'W'W'V'V'U'T'T'S'S'R'R'Q'Q'P'P'O'O'N'N'M'M'L'K'K'J'J'I'I'H'H'G'G'F'F'E'E'D'C'C'B'B'A'A'@'@'?'?'>'>'='='<'<';':':'9'9'8'8'7'7'6'6'5'5'4'4'3'2'2'1'1'0'0'/'/'.'.'-'-',','+'*'*')')'('('''''&'&'%'%'$'$'#'#'"'!'!' ' ''''''''''''''''''''''''''''''''''' ' ' ' ' ' ' ' ' ''''''''''''''''''%"      "#%')((''&&%$$##""!!                     !!""##$$%%&&''())))))))))))))))))) ) ) ) ) ) ) ) ) ))))))))))))))))))))))))))))))))))) ) )!)!)")#)#)$)$)%)%)&)&)')')()()))))*)+)+),),)-)-).).)/)/)0)0)1)2)2)3)3)4)4)5)5)6)6)7)7)8)8)9):):););)<)<)=)=)>)>)?)?)@)@)A)B)B)C)C)D)D)E)E)F)F)G)G)H)H)I)J)J)K)K)L)L)M)M)N)N)O)O)P)P)Q)R)R)S)S)T)T)U)U)V)V~)W~)W})X})X|)Y|)Z{)Z{)[z)[z)\y)\y)]x)]x)^w)^v)_v)_u)`u)`t)at)bs)bs)cr)cr)dq)dq)ep)ep)fo)fn)gn)gm)hm)hl)il)jk)jk)kj)kj)li)li)mh)mh)ng)nf)of)oe)pe)pd)qd)rc)rc)sb)sb)ta)ta)u`)u`)v_)v^)w^)w])x])x\)y\)z[)z[){Z){Z)|Y)|Y)}X)}X)~W)~V)V)U)U)T)T)S)S)R)R)Q)Q)P)O)O)N)N)M)M)L)L)K)K)J)J)I)I)H)G)G)F)F)E)E)D)D)C)C)B)B)A)A)@)?)?)>)>)=)=)<)<););):):)9)9)8)7)7)6)6)5)5)4)4)3)3)2)2)1)1)0)/)/).).)-)-),),)+)+)*)*)))))()')')&)&)%)%)$)$)#)#)")")!)!) ))))))))))))))))))))))))))))))))))) ) ) ) ) ) ) ) ) ) ))))))))))))))))(&$"      "#%')**))((''&&%%$$#""! !             ! !""##$$%%&''(())******************* * * * * * * * * * ********************************** * *!*!*"*"*#*#*$*$*%*%*&*'*'*(*(*)*)*****+*+*,*,*-*-*.*/*/*0*0*1*1*2*2*3*3*4*4*5*6*6*7*7*8*8*9*9*:*:*;*;*<*<*=*>*>*?*?*@*@*A*A*B*B*C*C*D*E*E*F*F*G*G*H*H*I*I*J*J*K*K*L*M*M*N*N*O*O*P*P*Q*Q*R*R*S*T*T*U~*U~*V}*V}*W|*W|*X{*X{*Yz*Yz*Zy*[y*[x*\w*\w*]v*]v*^u*^u*_t*_t*`s*`s*ar*ar*bq*cq*cp*do*do*en*en*fm*fm*gl*gl*hk*hk*ij*jj*ji*kh*kh*lg*lg*mf*mf*ne*ne*od*od*pc*pc*qb*rb*ra*s`*s`*t_*t_*u^*u^*v]*v]*w\*w\*x[*y[*yZ*zY*zY*{X*{X*|W*|W*}V*}V*~U*~U*T*T*S*S*R*Q*Q*P*P*O*O*N*N*M*M*L*L*K*J*J*I*I*H*H*G*G*F*F*E*E*D*D*C*B*B*A*A*@*@*?*?*>*>*=*=*<*;*;*:*:*9*9*8*8*7*7*6*6*5*4*4*3*3*2*2*1*1*0*0*/*/*.*.*-*,*,*+*+*****)*)*(*(*'*'*&*%*%*$*$*#*#*"*"*!*!* * ********************************** * * * * * * * * * *****************(&#!     "#%'(*,,++**)((''&&%%$$# # " ! !       ! ! " " ##$%%&&''(())**+,,,,,,,,,,,,,,,,,,, , , , , , , , , , ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, , ,!,!,",",#,$,$,%,%,&,&,',',(,(,),),*,+,+,,,,,-,-,.,.,/,/,0,0,1,2,2,3,3,4,4,5,5,6,6,7,7,8,9,9,:,:,;,;,<,<,=,=,>,>,?,@,@,A,A,B,B,C,C,D,D,E,E,F,F,G,H,H,I,I,J,J,K,K,L,L,M,M,N,O,O,P,P,Q,Q,R,R,S~,S~,T},T},U|,V|,V{,W{,Wz,Xz,Xy,Yy,Yx,Zw,Zw,[v,[v,\u,]u,]t,^t,^s,_s,_r,`r,`q,ap,ap,bo,bo,cn,dn,dm,em,el,fl,fk,gk,gj,hi,hi,ih,ih,jg,kg,kf,lf,le,me,md,nd,nc,ob,ob,pa,pa,q`,r`,r_,s_,s^,t^,t],u],u\,v[,v[,wZ,wZ,xY,yY,yX,zX,zW,{W,{V,|V,|U,}T,}T,~S,~S,R,R,Q,Q,P,P,O,O,N,M,M,L,L,K,K,J,J,I,I,H,H,G,F,F,E,E,D,D,C,C,B,B,A,A,@,?,?,>,>,=,=,<,<,;,;,:,:,9,8,8,7,7,6,6,5,5,4,4,3,3,2,1,1,0,0,/,/,.,.,-,-,,,,,+,*,*,),),(,(,',',&,&,%,%,$,#,#,",",!,!, , ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, , , , , , , , , , ,,,,,,,,,,,,,,,,,*'%#!      "#%'(*,.--,,++**))(''&&%% $ $ # # " " !  ! ! " # # $ $ %%&&''())**++,,--.................. . . . . . . . . ................................... . .!.".".#.#.$.$.%.%.&.&.'.(.(.).).*.*.+.+.,.,.-.-..././.0.0.1.1.2.2.3.3.4.4.5.6.6.7.7.8.8.9.9.:.:.;.<.<.=.=.>.>.?.?.@.@.A.A.B.C.C.D.D.E.E.F.F.G.G.H.H.I.J.J.K.K.L.L.M.M.N.N.O.P.P.Q.Q~.R~.R}.S}.S|.T|.T{.U{.Uz.Vz.Wy.Wy.Xx.Xw.Yw.Yv.Zv.Zu.[u.[t.\t.\s.]s.^r.^q._q._p.`p.`o.ao.an.bn.bm.cm.dl.dl.ek.ej.fj.fi.gi.gh.hh.hg.ig.if.jf.ke.ke.ld.lc.mc.mb.nb.na.oa.o`.p`.p_.q_.r^.r].s].s\.t\.t[.u[.uZ.vZ.vY.wY.xX.xX.yW.yV.zV.zU.{U.{T.|T.|S.}S.}R.~R.Q.Q.P.O.O.N.N.M.M.L.L.K.K.J.I.I.H.H.G.G.F.F.E.E.D.D.C.B.B.A.A.@.@.?.?.>.>.=.=.<.;.;.:.:.9.9.8.8.7.7.6.5.5.4.4.3.3.2.2.1.1.0.0./.....-.-.,.,.+.+.*.*.).).(.'.'.&.&.%.%.$.$.#.#.".!.!. . .................................. . . . . . . . . . .................+)'%"          "#%'(*,.///..-,,++**))(('' & % % $ $ # # " "!!  !!" " # # $ $ % % & ''(())**++,,-..//000000000000000000 0 0 0 0 0 0 0 0 0000000000000000000000000000000000 0 0!0!0"0"0#0#0$0$0%0&0&0'0'0(0(0)0)0*0*0+0,0,0-0-0.0.0/0/0000010202030304040505060607070809090:0:0;0;0<0<0=0=0>0?0?0@0@0A0A0B0B0C0C0D0E0E0F0F0G0G0H0H0I0I0J0J0K0L0L0M0M0N0N0O0O~0P~0P}0Q}0R|0R|0S{0S{0Tz0Tz0Uy0Ux0Vx0Vw0Ww0Xv0Xv0Yu0Yu0Zt0Zt0[s0[s0\r0\q0]q0]p0^p0_o0_o0`n0`n0am0am0bl0bk0ck0cj0dj0ei0ei0fh0fh0gg0gg0hf0he0ie0id0jd0kc0kc0lb0lb0ma0ma0n`0n`0o_0o^0p^0p]0q]0r\0r\0s[0s[0tZ0tZ0uY0uX0vX0vW0wW0xV0xV0yU0yU0zT0zT0{S0{R0|R0|Q0}Q0~P0~P0O0O0N0N0M0M0L0K0K0J0J0I0I0H0H0G0G0F0E0E0D0D0C0C0B0B0A0A0@0?0?0>0>0=0=0<0<0;0;0:0:0908080707060605050404030202010100000/0/0.0.0-0,0,0+0+0*0*0)0)0(0(0'0'0&0%0%0$0$0#0#0"0"0!0!0 00000000000000000000000000000000000 0 0 0 0 0 0 0 0 0000000000000000/-+)&$"           "#%'(*,./1100//..--,,+**))( ( ' ' & & % $ $ ##""!!    !!""## $ $ % & & ' ' ( ())**+,,--..//001111111111111111111 1 1 1 1 1 1 1 1 1111111111111111111111111111111111 1 1!1!1"1#1#1$1$1%1%1&1&1'1'1(1)1)1*1*1+1+1,1,1-1-1.1/1/101011111212131314151516161717181819191:1;1;1<1<1=1=1>1>1?1?1@1A1A1B1B1C1C1D1D1E1E1F1G1G1H1H1I1I1J1J1K1K1L1L1M1N~1N~1O}1O}1P|1P|1Q{1Q{1Rz1Rz1Sy1Tx1Tx1Uw1Uw1Vv1Vv1Wu1Wu1Xt1Xt1Ys1Zr1Zr1[q1[q1\p1\p1]o1]o1^n1^n1_m1`l1`l1ak1ak1bj1bj1ci1ci1dh1dh1eg1ff1ff1ge1ge1hd1hd1ic1ic1jb1jb1ka1l`1l`1m_1m_1n^1n^1o]1o]1p\1p\1q[1rZ1rZ1sY1sY1tX1tX1uW1uW1vV1vV1wU1xT1xT1yS1yS1zR1zR1{Q1{Q1|P1|P1}O1~N1~N1M1M1L1L1K1K1J1J1I1H1H1G1G1F1F1E1E1D1D1C1B1B1A1A1@1@1?1?1>1>1=1<1<1;1;1:1:1919181817161615151414131312121110101/1/1.1.1-1-1,1,1+1*1*1)1)1(1(1'1'1&1&1%1$1$1#1#1"1"1!1!1 1 1111111111111111111111111111111111 1 1 1 1 1 1 1 1 11111111111111111/-*(&$!           "#%'(*,./133221100/..--,,++* ) ) ( ( ' ' & & %%$##""!!    !!""#$$%% & & ' ' ( ( ) * *++,,--.//0011223333333333333333333 3 3 3 3 3 3 3 3 3333333333333333333333333333333333 3!3!3"3"3#3#3$3$3%3%3&3'3'3(3(3)3)3*3*3+3+3,3-3-3.3.3/3/3030313232333334343535363637383839393:3:3;3;3<3<3=3>3>3?3?3@3@3A3A3B3B3C3D3D3E3E3F3F3G3G3H3I3I3J3J3K3K3L~3L~3M}3M}3N|3O|3O{3P{3Pz3Qz3Qy3Rx3Rx3Sw3Sw3Tv3Uv3Uu3Vu3Vt3Wt3Ws3Xr3Xr3Yq3Yq3Zp3[p3[o3\o3\n3]m3]m3^l3^l3_k3`k3`j3aj3ai3bi3bh3cg3cg3df3df3ee3fe3fd3gd3gc3hc3hb3ia3ia3j`3j`3k_3l_3l^3m^3m]3n]3n\3o[3o[3pZ3pZ3qY3rY3rX3sX3sW3tV3tV3uU3uU3vT3wT3wS3xS3xR3yR3yQ3zP3zP3{O3{O3|N3}N3}M3~M3~L3L3K3J3J3I3I3H3H3G3G3F3F3E3D3D3C3C3B3B3A3A3@3?3?3>3>3=3=3<3<3;3;3:393938383737363635353433333232313130303/3/3.3-3-3,3,3+3+3*3*3)3(3(3'3'3&3&3%3%3$3$3#3"3"3!3!3 3 3333333333333333333333333333333333 3 3 3 3 3 3 3 3 333333333333333331.,*'%#!           "#%'(*,./1355443221100//.--,, + + * * ) ) ( ' '&&%%$$#""!!    !""##$$%%&&' ( ( ) ) * * + + ,--..//001123344555555555555555555 5 5 5 5 5 5 5 5 5 555555555555555555555555555555555 5 5!5!5"5"5#5#5$5%5%5&5&5'5'5(5(5)5*5*5+5+5,5,5-5-5.5.5/505051515252535354555556565757585859595:5;5;5<5<5=5=5>5>5?5@5@5A5A5B5B5C5C5D5D5E5F5F5G5G5H5H5I5I5J~5K~5K}5L}5L|5M|5M{5N{5Nz5Oz5Oy5Px5Qx5Qw5Rw5Rv5Sv5Su5Tu5Tt5Us5Vs5Vr5Wr5Wq5Xq5Xp5Yp5Yo5Zo5Zn5[m5\m5\l5]l5]k5^k5^j5_j5_i5`h5ah5ag5bg5bf5cf5ce5de5dd5ed5ec5fb5gb5ga5ha5h`5i`5i_5j_5j^5k]5l]5l\5m\5m[5n[5nZ5oZ5oY5pY5pX5qW5rW5rV5sV5sU5tU5tT5uT5uS5vR5wR5wQ5xQ5xP5yP5yO5zO5zN5{N5{M5|L5}L5}K5~K5~J5J5I5I5H5G5G5F5F5E5E5D5D5C5C5B5A5A5@5@5?5?5>5>5=5<5<5;5;5:5:5959585857565655555454535352515150505/5/5.5.5-5-5,5+5+5*5*5)5)5(5(5'5&5&5%5%5$5$5#5#5"5"5!5 5 5555555555555555555555555555555555 5 5 5 5 5 5 5 5 5555555555555555520.+)'%"             "#%'(*,./13576655443321100//.. - , , + + * * ) )(('&&%%$$##"!!   !!""##$$%&&''(() ) * * + , , - - ..//011223344566777777777777777777 7 7 7 7 7 7 7 7 7777777777777777777777777777777777 7 7!7!7"7#7#7$7$7%7%7&7&7'7(7(7)7)7*7*7+7+7,7-7-7.7.7/7/7070717272737374747575767677787879797:7:7;7;7<7=7=7>7>7?7?7@7@7A7B7B7C7C7D7D7E7E7F7G7G7H7H~7I~7I}7J}7J|7K|7K{7L{7Mz7My7Ny7Nx7Ox7Ow7Pw7Pv7Qv7Ru7Ru7St7Ss7Ts7Tr7Ur7Uq7Vq7Wp7Wp7Xo7Xn7Yn7Ym7Zm7Zl7[l7[k7\k7]j7]i7^i7^h7_h7_g7`g7`f7af7be7bd7cd7cc7dc7db7eb7ea7fa7g`7g`7h_7h^7i^7i]7j]7j\7k\7l[7l[7mZ7mY7nY7nX7oX7oW7pW7pV7qV7rU7rT7sT7sS7tS7tR7uR7uQ7vQ7wP7wO7xO7xN7yN7yM7zM7zL7{L7|K7|K7}J7}I7~I7~H7H7G7G7F7F7E7D7D7C7C7B7B7A7A7@7?7?7>7>7=7=7<7<7;7;7:7979787877777676757474737372727171707/7/7.7.7-7-7,7,7+7*7*7)7)7(7(7'7'7&7&7%7$7$7#7#7"7"7!7!7 7777777777777777777777777777777777 7 7 7 7 7 7 7 7 7777777777777777642/-+)&$"              "#%'(*,./135788877665443322110/ / . . - - , , + **))((''&%%$$##""!    !!""##$%%&&''(()**+ + , , - - . / / 0011223445566778999999999999999999 9 9 9 9 9 9 9 9 9999999999999999999999999999999999 9!9!9"9"9#9#9$9$9%9&9&9'9'9(9(9)9)9*9+9+9,9,9-9-9.9.9/9090919192929393949595969697979898999:9:9;9;9<9<9=9=9>9?9?9@9@9A9A9B9B9C9D9D9E9E9F9F~9G~9G}9H}9I|9I|9J{9J{9Kz9Ky9Ly9Lx9Mx9Nw9Nw9Ov9Ov9Pu9Pt9Qt9Qs9Rs9Sr9Sr9Tq9Tq9Up9Uo9Vo9Vn9Wn9Xm9Xm9Yl9Yl9Zk9Zj9[j9[i9\i9]h9]h9^g9^g9_f9_e9`e9`d9ad9bc9bc9cb9cb9da9d`9e`9e_9f_9g^9g^9h]9h]9i\9i[9j[9jZ9kZ9kY9lY9mX9mX9nW9nW9oV9oU9pU9pT9qT9rS9rS9sR9sR9tQ9tP9uP9uO9vO9wN9wN9xM9xM9yL9yK9zK9zJ9{J9|I9|I9}H9}H9~G9~F9F9E9E9D9D9C9C9B9A9A9@9@9?9?9>9>9=9<9<9;9;9:9:9999989797969695959494939292919190909/9/9.9-9-9,9,9+9+9*9*9)9(9(9'9'9&9&9%9%9$9#9#9"9"9!9!9 9 999999999999999999999999999999999 9 9 9 9 9 9 9 9 99999999999999998641/-*(&$!                 "#%'(*,./13578:::988776655433221 1 0 0 / . . - - ,,++*))((''&&%$$##""!!   !!""#$$%%&&'(())**++,- - . . / / 0 0 1 223344556778899::;;;;;;;;;;;;;;;;; ; ; ; ; ; ; ; ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ;!;!;";";#;$;$;%;%;&;&;';';(;););*;*;+;+;,;-;-;.;.;/;/;0;0;1;2;2;3;3;4;4;5;5;6;7;7;8;8;9;9;:;:;;;<;<;=;=;>;>;?;?;@;A;A;B;B;C;C;D;D~;E~;F};F};G|;G|;H{;H{;Iz;Iy;Jy;Kx;Kx;Lw;Lw;Mv;Mv;Nu;Nt;Ot;Ps;Ps;Qr;Qr;Rq;Rq;Sp;To;To;Un;Un;Vm;Vm;Wl;Wk;Xk;Yj;Yj;Zi;Zi;[h;[h;\g;\f;]f;^e;^e;_d;_d;`c;`c;ab;aa;ba;c`;c`;d_;d_;e^;e^;f];f\;g\;h[;h[;iZ;iZ;jY;jY;kX;kW;lW;mV;mV;nU;nU;oT;oT;pS;pR;qR;rQ;rQ;sP;sP;tO;tO;uN;vM;vM;wL;wL;xK;xK;yJ;yJ;zI;{H;{H;|G;|G;}F;}F;~E;~D;D;C;C;B;B;A;A;@;?;?;>;>;=;=;<;<;;;:;:;9;9;8;8;7;7;6;5;5;4;4;3;3;2;2;1;0;0;/;/;.;.;-;-;,;+;+;*;*;););(;(;';&;&;%;%;$;$;#;";";!;!; ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; ; ; ; ; ; ; ;;;;;;;;;;;;;;;;:8531.,*(%#!                  "#%'(*,./13578:<<;;::998876655443 3 2 1 1 0 0 / / .--,,++**)((''&&%%$##""!!   !""##$$%%&''(())**+,,--.. / 0 0 1 1 2 2 3 34556677889::;;<<<<<<<<<<<<<<<<<<< < < < < < < < < <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< < <><><=<<<<<;<;<:<:<9<8<8<7<7<6<6<5<5<4<3<3<2<2<1<1<0<0>==<<;;:998877655 4 4 3 3 2 2 1 0 0//..-,,++**))(''&&%%$##""!!   !!""##$$%&&''(()**++,,--.//00 1 1 2 3 3 4 4 5 56678899::;<<==>>>>>>>>>>>>>>>>>>> > > > > > > > >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> > >!>!>">">#>#>$>%>%>&>&>'>'>(>)>)>*>*>+>+>,>,>->.>.>/>/>0>0>1>2>2>3>3>4>4>5>5>6>7>7>8>8>9>9>:>;>;><><>=>=>>>>>?>@>@>A~>A~>B}>B}>C|>D|>D{>E{>Ez>Fy>Fy>Gx>Gx>Hw>Iw>Iv>Ju>Ju>Kt>Kt>Ls>Ms>Mr>Nr>Nq>Op>Op>Po>Po>Qn>Rn>Rm>Sl>Sl>Tk>Tk>Uj>Vj>Vi>Wi>Wh>Xg>Xg>Yf>Yf>Ze>[e>[d>\c>\c>]b>]b>^a>^a>_`>``>`_>a^>a^>b]>b]>c\>d\>d[>eZ>eZ>fY>fY>gX>gX>hW>iW>iV>jU>jU>kT>kT>lS>mS>mR>nQ>nQ>oP>oP>pO>pO>qN>rN>rM>sL>sL>tK>tK>uJ>vJ>vI>wH>wH>xG>xG>yF>yF>zE>{E>{D>|C>|C>}B>}B>~A>A>@>?>?>>>>>=>=><><>;>:>:>9>9>8>8>7>6>6>5>5>4>4>3>3>2>1>1>0>0>/>/>.>->->,>,>+>+>*>*>)>(>(>'>'>&>&>%>$>$>#>#>">">!>!> >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> > > > > > > > > >>>>>>>>>>>>>>>>>;97420-+)&$"                   " #%'(*,./13578:<>@@??>>=<<;;::98877 6 6 5 5 4 3 3 2 2110//..--,++**))(('&&%%$$#""!! !!""#$$%%&&''())**++,--..//01122 3 3 4 4 5 6 6 77889::;;<<==>??@@@@@@@@@@@@@@@@@@ @ @ @ @ @ @ @ @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ @!@!@"@#@#@$@$@%@%@&@'@'@(@(@)@)@*@*@+@,@,@-@-@.@.@/@0@0@1@1@2@2@3@4@4@5@5@6@6@7@7@8@9@9@:@:@;@;@<@=@=@>@>@?~@?~@@}@@}@A|@B|@B{@Cz@Cz@Dy@Dy@Ex@Fx@Fw@Gw@Gv@Hu@Hu@It@Jt@Js@Ks@Kr@Lq@Lq@Mp@Mp@No@Oo@On@Pm@Pm@Ql@Ql@Rk@Sk@Sj@Tj@Ti@Uh@Uh@Vg@Wg@Wf@Xf@Xe@Yd@Yd@Zc@Zc@[b@\b@\a@]a@]`@^_@^_@_^@`^@`]@a]@a\@b[@b[@cZ@dZ@dY@eY@eX@fW@fW@gV@gV@hU@iU@iT@jT@jS@kR@kR@lQ@mQ@mP@nP@nO@oN@oN@pM@pM@qL@rL@rK@sJ@sJ@tI@tI@uH@vH@vG@wG@wF@xE@xE@yD@zD@zC@{C@{B@|A@|A@}@@}@@~?@?@>@=@=@<@<@;@;@:@:@9@8@8@7@7@6@6@5@4@4@3@3@2@2@1@1@0@/@/@.@.@-@-@,@+@+@*@*@)@)@(@'@'@&@&@%@%@$@$@#@"@"@!@!@ @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ @ @ @ @ @ @ @ @@@@@@@@@@@@@@@@@=;8641/-*(&$!                  " # %'(*,./13578:<>@BBAA@??>>==<<;::99 8 8 7 6 6 5 5 443221100/..--,,+**))((''&%%$$##" ! !! ! "##$$%%&''(())**+,,--../001122344 5 5 6 6 7 8 8 99::;<<==>>??@AABBBBBBBBBBBBBBBBBB B B B B B B B B BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB B!B!B"B"B#B#B$B$B%B&B&B'B'B(B(B)B*B*B+B+B,B,B-B.B.B/B/B0B0B1B2B2B3B3B4B4B5B5B6B7B7B8B8B9B9B:B;B;B}B?}B?|B@|B@{BAzBAzBByBCyBCxBDxBDwBEvBEvBFuBGuBGtBHtBHsBIsBIrBJqBJqBKpBLpBLoBMoBMnBNmBNmBOlBPlBPkBQkBQjBRiBRiBShBThBTgBUgBUfBVeBVeBWdBXdBXcBYcBYbBZbBZaB[`B\`B\_B]_B]^B^^B^]B_\B_\B`[Ba[BaZBbZBbYBcXBcXBdWBeWBeVBfVBfUBgTBgTBhSBiSBiRBjRBjQBkPBkPBlOBmOBmNBnNBnMBoMBoLBpKBpKBqJBrJBrIBsIBsHBtGBtGBuFBvFBvEBwEBwDBxCBxCByBBzBBzAB{AB{@B|?B|?B}>B~>B~=B=B@BDDCCBAA@@??>==<<;; : 9 9 8 8 7 7 655443321100//.--,,++*))((''&&%$$ # #!""""!# # $$%&&''(()**++,,-..//001223344556 7 7 8 8 9 9 : ;;<<==>??@@AABCCDDDDDDDDDDDDDDDDDD D D D D D D D D DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD D D!D!D"D"D#D$D$D%D%D&D&D'D(D(D)D)D*D*D+D,D,D-D-D.D.D/D0D0D1D1D2D2D3D4D4D5D5D6D6D7D8D8D9D9D:D:D;~D<~D<}D=}D=|D>|D>{D?zD?zD@yDAyDAxDBxDBwDCvDCvDDuDEuDEtDFtDFsDGrDGrDHqDIqDIpDJpDJoDKnDKnDLmDMmDMlDNlDNkDOjDOjDPiDQiDQhDRhDRgDSfDSfDTeDUeDUdDVdDVcDWcDWbDXaDYaDY`DZ`DZ_D[_D[^D\]D]]D]\D^\D^[D_[D_ZD`YDaYDaXDbXDbWDcWDcVDdUDeUDeTDfTDfSDgSDgRDhQDiQDiPDjPDjODkODkNDlMDmMDmLDnLDnKDoKDoJDpIDpIDqHDrHDrGDsGDsFDtEDtEDuDDvDDvCDwCDwBDxADxADy@Dz@Dz?D{?D{>D|=D|=D}@BDFFEEDCCBBAA@??>>= = < ; ; : : 9 8 877665443322100//..-,,++**)((''&& %!$!$"#"##"#"$!% % &&''())**++,--..//011223345566778 9 9 : : ; ; < ==>>??@AABBCCDEEFFFFFFFFFFFFFFFFFF F F F F F F F F FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF F F!F"F"F#F#F$F$F%F&F&F'F'F(F(F)F*F*F+F+F,F,F-F.F.F/F/F0F0F1F2F2F3F3F4F4F5F6F6F7F7F8F8F9~F:~F:}F;}F;|F<|F<{F=zF>zF>yF?yF?xF@xF@wFAvFBvFBuFCuFCtFDtFDsFErFFrFFqFGqFGpFHpFHoFInFJnFJmFKmFKlFLlFLkFMjFNjFNiFOiFOhFPhFPgFQfFRfFReFSeFSdFTdFTcFUbFVbFVaFWaFW`FX`FX_FY^FZ^FZ]F[]F[\F\\F\[F]ZF^ZF^YF_YF_XF`XF`WFaVFbVFbUFcUFcTFdTFdSFeRFfRFfQFgQFgPFhOFhOFiNFjNFjMFkMFkLFlKFlKFmJFnJFnIFoIFoHFpGFpGFqFFrFFrEFsEFsDFtCFtCFuBFvBFvAFwAFw@Fx?Fy?Fy>Fz>Fz=F{=F{<97420-+)&$"              " # % ' ( *,./13568:<>@BDFHHGFFEEDDCBBAA@@? > > = = < < ; : :99887665544322110//..--,++**))( ' '!&!&"%"%#$$#$#%"%"&!&!' (())**+,,--.//001123344556778899: ; ; < < = = > ??@@AABCCDDEFFGGHHHHHHHHHHHHHHHHHH H H H H H H H HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH H!H!H"H"H#H#H$H%H%H&H&H'H'H(H)H)H*H*H+H+H,H-H-H.H.H/H0H0H1H1H2H2H3H4H4H5H5H6H6H7~H8~H8}H9}H9|H:|H:{H;zHxH>wH?vH@vH@uHAuHAtHBtHBsHCrHDrHDqHEqHEpHFoHGoHGnHHnHHmHImHIlHJkHKkHKjHLjHLiHMiHMhHNgHOgHOfHPfHPeHQeHQdHRcHScHSbHTbHTaHUaHU`HV_HW_HW^HX^HX]HY]HY\HZ[H[[H[ZH\ZH\YH]XH^XH^WH_WH_VH`VH`UHaTHbTHbSHcSHcRHdRHdQHePHfPHfOHgOHgNHhNHhMHiLHjLHjKHkKHkJHlJHlIHmHHnHHnGHoGHoFHpFHpEHqDHrDHrCHsCHsBHtAHuAHu@Hv@Hv?Hw?Hw>Hx=Hy=Hy@BDFHJJIHHGGFFEDDCCBBA @ @ ? ? > > = < <;;:998877655443321100/..--,,+** ) )!(!("'#&#&$%$%%$%$&#'"'"(!(!) **++,,-..//00122334456677899::;;< = = > > ? ? @ AABBCDDEEFFGHHIIJJJJJJJJJJJJJJJJJ J J J J J J J J JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ J J!J!J"J#J#J$J$J%J%J&J'J'J(J(J)J)J*J+J+J,J,J-J-J.J/J/J0J0J1J2J2J3J3J4J4J5~J6~J6}J7}J7|J8|J8{J9zJ:zJ:yJ;yJ;xJvJ>uJ?uJ?tJ@sJAsJArJBrJBqJCqJCpJDoJEoJEnJFnJFmJGmJGlJHkJIkJIjJJjJJiJKhJLhJLgJMgJMfJNfJNeJOdJPdJPcJQcJQbJRbJRaJS`JT`JT_JU_JU^JV^JW]JW\JX\JX[JY[JYZJZYJ[YJ[XJ\XJ\WJ]WJ]VJ^UJ_UJ_TJ`TJ`SJaSJaRJbQJcQJcPJdPJdOJeNJfNJfMJgMJgLJhLJhKJiJJjJJjIJkIJkHJlHJlGJmFJnFJnEJoEJoDJpDJpCJqBJrBJrAJsAJs@Jt?Ju?Ju>Jv>Jv=Jw=Jw@BDFHJLLKKJIIHHGFFEEDDC B B A A @ ? ? > >==<;;::99877665443322100//.--,, + +!*")")#(#($'$'%&&%&%'$'$(#)")"*!*!+ + ,--../001122344556678899:;;<<==> ? ? @ @ A A B CCDDEFFGGHHIJJKKLLLLLLLLLLLLLLLLL L L L L L L L L LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL L L!L"L"L#L#L$L$L%L&L&L'L'L(L)L)L*L*L+L+L,L-L-L.L.L/L0L0L1L1L2L2L3~L4~L4}L5}L5|L6|L6{L7zL8zL8yL9yL9xL:wL;wL;vLsL?sL?rL@rL@qLApLApLBoLCoLCnLDnLDmLElLFlLFkLGkLGjLHjLHiLIhLJhLJgLKgLKfLLeLMeLMdLNdLNcLOcLObLPaLQaLQ`LR`LR_LS_LS^LT]LU]LU\LV\LV[LWZLXZLXYLYYLYXLZXLZWL[VL\VL\UL]UL]TL^SL_SL_RL`RL`QLaQLaPLbOLcOLcNLdNLdMLeMLeLLfKLgKLgJLhJLhILiHLjHLjGLkGLkFLlFLlELmDLnDLnCLoCLoBLpALpALq@Lr@Lr?Ls?Ls>Lt=Lu=Lu@BDFHJLNNMMLKKJJIHHGGFFE D D C C B A A @ @??>==<<;::99887665543322110//.. -!,!,"+"+#*#*$)%(%(&'&''&(%(%)$)$*#*#+",!,!- - .//00112334456677889::;;<==>>??@ A A B B C D D EEFFGHHIIJKKLLMMNNNNNNNNNNNNNNNNN N N N N N N N N NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN N!N!N"N"N#N$N$N%N%N&N&N'N(N(N)N)N*N+N+N,N,N-N-N.N/N/N0N0N1~N2~N2}N3}N3|N4|N4{N5zN6zN6yN7yN7xN8wN9wN9vN:vN:uN;uN;tNrN>qN?pN@pN@oNAoNAnNBnNBmNClNDlNDkNEkNEjNFiNGiNGhNHhNHgNIgNIfNJeNKeNKdNLdNLcNMbNNbNNaNOaNO`NP`NP_NQ^NR^NR]NS]NS\NT[NU[NUZNVZNVYNWYNWXNXWNYWNYVNZVNZUN[TN\TN\SN]SN]RN^RN^QN_PN`PN`ONaONaNNbMNcMNcLNdLNdKNeKNeJNfINgINgHNhHNhGNiFNiFNjENkENkDNlDNlCNmBNnBNnANoANo@Np?Np?Nq>Nr>Nr=Ns=Ns<97420-+)'$ "             " # % ' ( * , - / 13568:<>@BDFHJLNPPOONMMLLKJJIIHHG F F E E D C C B BAA@??>>=<<;;::9887765544322110 0 /!.!."-"-#,$+$+%*%*&)')'(('(')&*&*%+$+$,#,#-".!.!/ / 01122334556678899::;<<==>??@@ABB C C D D E F F GGHIIJJKKLMMNNOPPPPPPPPPPPPPPPPPP P P P P P P P PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP P P!P!P"P#P#P$P$P%P&P&P'P'P(P(P)P*P*P+P+P,P-P-P.P.P/~P/~P0}P1}P1|P2{P2{P3zP4zP4yP5yP5xP6wP7wP7vP8vP8uP9tP9tP:sP;sP;rPpP>oP?oP?nP@mP@mPAlPBlPBkPCkPCjPDiPEiPEhPFhPFgPGfPHfPHePIePIdPJcPJcPKbPLbPLaPMaPM`PN_PO_PO^PP^PP]PQ\PQ\PR[PS[PSZPTZPTYPUXPVXPVWPWWPWVPXUPXUPYTPZTPZSP[RP[RP\QP]QP]PP^PP^OP_NP`NP`MPaMPaLPbKPbKPcJPdJPdIPeIPeHPfGPgGPgFPhFPhEPiDPiDPjCPkCPkBPlBPlAPm@Pn@Pn?Po?Po>Pp=Pp=Pq;9642/-+(& $ "            " # % ' ( * , - / 1 3 568:<>@BDFHJLNPRRQQPOONNMMLKKJJI H H G G F E E D DCCBAA@@?>>==<;;::9987766544332 1 1!0!0"/#/#.$-$-%,&,&+'*'*())))(*'*'+&+&,%-%-$.#.#/"0"0!1 1 2334455677889::;;<==>>??@AABBCDD E E F G G H H IIJKKLLMNNOOPQQRRRRRRRRRRRRRRRRRR R R R R R R R RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR R R!R"R"R#R#R$R%R%R&R&R'R(R(R)R)R*R*R+R,R,R-~R-~R.}R/}R/|R0{R0{R1zR2zR2yR3yR3xR4wR4wR5vR6vR6uR7tR7tR8sR9sR9rR:qR:qR;pRmR>mR?lR@lR@kRAjRAjRBiRCiRChRDgRDgREfRFfRFeRGeRGdRHcRHcRIbRJbRJaRK`RK`RL_RM_RM^RN]RN]RO\RP\RP[RQ[RQZRRYRRYRSXRTXRTWRUVRUVRVURWURWTRXSRXSRYRRZRRZQR[QR[PR\OR\OR]NR^NR^MR_LR_LR`KRaKRaJRbIRbIRcHRdHRdGReGReFRfERfERgDRhDRhCRiBRiBRjARkARk@Rl?Rl?Rm>Rn>Rn=Ro=Ro@BDFHJLNPRTTSSRRQPPOONMMLLK J J I I H H G FFEEDCCBBA@@??>==<<;;:9988766554 3 3!2"2"1#0#0$/%/%.&.&-',(,(+)+)**)*)+(,(,'-&-&.%/%/$0#0#1"2"2!3!3 4556677899::;<<==>??@@ABBCCDDEFF G G H I I J J KLLMMNOOPPQQRSSTTTTTTTTTTTTTTTTTT T T T T T T T TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT T T!T!T"T"T#T$T$T%T%T&T'T'T(T(T)T*T*T+~T+~T,}T-}T-|T.{T.{T/zT/zT0yT1xT1xT2wT2wT3vT4vT4uT5tT5tT6sT7sT7rT8qT8qT9pT:pT:oT;nT;nTkT>kT?jT?jT@iTAiTAhTBgTBgTCfTDfTDeTEdTEdTFcTGcTGbTHaTHaTI`TI`TJ_TK^TK^TL]TL]TM\TN\TN[TOZTOZTPYTQYTQXTRWTRWTSVTTVTTUTUTTUTTVSTVSTWRTXQTXQTYPTYPTZOT[OT[NT\MT\MT]LT^LT^KT_JT_JT`ITaITaHTbGTbGTcFTcFTdETeDTeDTfCTfCTgBThBThATi@Ti@Tj?Tk?Tk>Tl=Tl=Tm@BDFHJLNPRTVVUUTTSRRQQPPONNM M L K K J J I HHGGFEEDDCBBAA@??>>==<;;::988776 5!5!4"4"3#2$2$1%1%0&/'/'.(.(-),),*++++*,),)-(.(.'/'/&0%1%1$2$2#3"4"4!5!5 677889::;;<<=>>??@AABBCDDEEFGGHH I J J K K L M MNNOOPQQRRSTTUUVVVVVVVVVVVVVVVVV V V V V V V V V VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV V V!V"V"V#V#V$V$V%V&V&V'V'V(V)~V)~V*}V*}V+|V,{V,{V-zV-zV.yV/xV/xV0wV0wV1vV2uV2uV3tV3tV4sV5sV5rV6qV6qV7pV7pV8oV9nV9nV:mV:mV;lViV?hV?hV@gV@gVAfVBeVBeVCdVCdVDcVEbVEbVFaVFaVG`VH`VH_VI^VI^VJ]VJ]VK\VL[VL[VMZVMZVNYVOXVOXVPWVPWVQVVRUVRUVSTVSTVTSVURVURVVQVVQVWPVXOVXOVYNVYNVZMV[MV[LV\KV\KV]JV]JV^IV_HV_HV`GV`GVaFVbEVbEVcDVcDVdCVeBVeBVfAVfAVg@Vh?Vh?Vi>Vi>Vj=Vk<97520. + ) ' $ "          "#%'( * , - / 1 3 5 6 8 :<>@BDFHIKNPRTVXXXWVVUUTSSRRQPPO O N M M L L K JJIIHGGFFEDDCCBAA@@??>==<<;::99 8 7!7!6"6#5#4$4$3%3&2&1'1'0(0(/).*.*-+-+,,+-+-*.*.)/(0(0'1'1&2%3%3$4$4#5"6"6!7!7 899::;<<==>??@@ABBCCDEEFFGHHIIJJ K L L M M N O OPPQRRSSTUUVVWXXYYYYYYYYYYYYYYYY Y Y Y Y Y Y Y Y YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY Y!Y!Y"Y"Y#Y$Y$Y%Y%Y&Y'~Y'~Y(}Y(}Y)|Y){Y*{Y+zY+zY,yY,xY-xY.wY.wY/vY/uY0uY1tY1tY2sY2rY3rY4qY4qY5pY5oY6oY7nY7nY8mY8mY9lY:kY:kY;jY;jYgY>gY?fY@eY@eYAdYAdYBcYCbYCbYDaYDaYE`YF_YF_YG^YG^YH]YI\YI\YJ[YJ[YKZYLYYLYYMXYMXYNWYNVYOVYPUYPUYQTYQSYRSYSRYSRYTQYTPYUPYVOYVOYWNYWMYXMYYLYYLYZKYZJY[JY\IY\IY]HY]HY^GY_FY_FY`EY`EYaDYbCYbCYcBYcBYdAYe@Ye@Yf?Yf?Yg>Yh=Yh=Yi;9642/ - + ( & $ "        "#%'(* , - / 1 3 5 6 8 : <>@BDFGIKNPRTVXZZZYYXWWVVUTTSSRQ Q P P O N N M MLKKJJIHHGGFEEDDCBBAA@??>>=<<;; : 9!9!8"8#7#6$6$5%5&4&3'3'2(2)1)0*0*/+/,.,----,.,/+/*0*0)1)2(2'3'3&4&5%5$6$6#7#8"8!9!9 : ;;<<=>>??@AABBCDDEEFGGHHIJJKKLM M N N O P P Q QRSSTTUVVWWXYYZZ[[[[[[[[[[[[[[[[ [ [ [ [ [ [ [ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ [ [![!["[#[#[$[$~[%~[&}[&}['|['{[({[)z[)z[*y[*x[+x[,w[,w[-v[-u[.u[/t[/t[0s[0r[1r[2q[2q[3p[3o[4o[5n[5n[6m[6l[7l[8k[8k[9j[9i[:i[;h[;h[e[>e[?d[?c[@c[Ab[Ab[Ba[B`[C`[D_[D_[E^[E][F][G\[G\[H[[HZ[IZ[JY[JY[KX[KW[LW[MV[MV[NU[NT[OT[PS[PS[QR[QQ[RQ[SP[SP[TO[TN[UN[VM[VM[WL[WK[XK[YJ[YJ[ZI[ZH[[H[\G[\G[]F[]E[^E[_D[_D[`C[`B[aB[bA[bA[c@[c?[d?[e>[e>[f=[f<[g<[h;[h;[i:[i9[j9[k8[k8[l7[l6[m6[n5[n5[o4[o3[p3[p2[q2[r1[r0[s0[s/[t/[u.[u-[v-[v,[w,[x+[x*[y*[y)[z)[{([{'[|'[|&[}&[~%[~$[$[#[#["[![![ [ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ [ [ [ [ [ [ [ [[[[[[[[[[[[[[[ZWTROLJGEB@=;8631 / , * ( & # !       "#%'(*, - / 1 3 5 6 8 : < >@BDFGIKMPRTVXZ\]\[[ZYYXXWVVUUTS S R R Q P P O ONMMLLKJJIIHGGFFEDDCCBAA@@?>>== < ;!;":":#9#8$8%7%7&6&5'5(4(4)3)2*2+1+1,0,/-/..../-/,0,1+1+2*3)3)4(4'5'6&6&7%7$8$9#9#:":!;!< < ==>??@@ABBCCDEEFFGHHIIJKKLLMNNO O P Q Q R R S TTUUVWWXXYZZ[[\]]]]]]]]]]]]]]]]] ] ] ] ] ] ] ] ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ] ]!]"]"~]#~]#}]$}]%|]%{]&{]&z]'z](y](x])x])w]*w]+v]+u],u],t]-t].s].r]/r]/q]0q]1p]1o]2o]2n]3n]4m]4l]5l]5k]6k]7j]7i]8i]8h]9h]:g]:f];f];e]c]?b]?a]@a]@`]A`]B_]B^]C^]C]]D]]E\]E[]F[]FZ]GZ]HY]HX]IX]IW]JW]KV]KU]LU]LT]MT]NS]NR]OR]OQ]PQ]QP]QO]RO]RN]SN]TM]TL]UL]UK]VK]WJ]WI]XI]XH]YH]ZG]ZF][F][E]\E]]D]]C]^C]^B]_B]`A]`@]a@]a?]b?]c>]c=]d=]d<]e<]f;]f:]g:]g9]h9]i8]i7]j7]j6]k6]l5]l4]m4]m3]n3]o2]o1]p1]p0]q/]r/]r.]s.]t-]t,]u,]u+]v+]w*]w)]x)]x(]y(]z']z&]{&]{%]|%]}$]}#]~#]~"]"]!] ] ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ] ] ] ] ] ] ] ]]]]]]]]]]]]]]]\YWTQOLIGDB?=:853 1 . , * ' % #       "#%'(*,- / 1 3 5 6 8 : < > @BDEGIKMPRTVXZ\^_^]]\\[ZZYYXWWVV U T T S S R Q QPPONNMMLKKJJIHHGFFEEDCCBBA@@?? >!=!="<"<#;$:$:%9%9&8'7'7(6(6)5*4*4+3+3,2-1-1.0////0.0.1-2,2,3+3+4*5)5)6(6(7'8&8&9%9%:$;#;#<"<"=!> > ??@AABBCDDEFFGGHIIJJKLLMMNOOPPQ R R S S T U U VVWXXYYZ[[\]]^^_________________ _ _ _ _ _ _ _ _______________________________ ~_!~_!}_"}_"|_#{_${_$z_%z_%y_&x_'x_'w_(w_(v_)u_*u_*t_+t_,s_,r_-r_-q_.p_/p_/o_0o_0n_1m_2m_2l_3l_3k_4j_5j_5i_6i_6h_7g_8g_8f_9f_9e_:d_;d_;c_a_>`_?`_?__@^_A^_A]_B]_C\_C[_D[_DZ_EY_FY_FX_GX_GW_HV_IV_IU_JU_JT_KS_LS_LR_MR_MQ_NP_OP_OO_PO_PN_QM_RM_RL_SL_SK_TJ_UJ_UI_VI_VH_WG_XG_XF_YF_ZE_ZD_[D_[C_\B_]B_]A_^A_^@__?_`?_`>_a>_a=_b<_c<_c;_d;_d:_e9_f9_f8_g8_g7_h6_i6_i5_j5_j4_k3_l3_l2_m2_m1_n0_o0_o/_p/_q._q-_r-_r,_s+_t+_t*_u*_u)_v(_w(_w'_x'_x&_y%_z%_z$_{$_{#_|"_}"_}!_~!_~ _______________________________ _ _ _ _ _ _ _ _______________^[YVSQNKIFDA?<:75 2 0 . + ) ' $ "      "#%'(*,-/ 1 3 5 6 8 : < > @ BDEGIKMPRTVXZ\^aa``_^^]]\[[ZZYXX W W V U U T T SRRQPPOONMMLLKJJIIHGGFFEDDCCBA A @!?!?">#>#=$<$<%;&;&:'9'9(8)8)7*6*6+5,5,4-3.3.2/1/100101/2.2.3-4-4,5+5+6*7*7)8(8(9':':&;%;%<$=#=#>"?"?!@ @ ABBCCDEEFFGHHIIJKKLMMNNOPPQQRSS T T U V V W W XYYZ[[\\]^^__`aaaaaaaaaaaaaaaaa a a a a a a a a aaaaaaaaaaaaaaaaaaaaaaaaaaa~a~a}a }a |a!{a!{a"za#za#ya$xa$xa%wa&va&va'ua(ua(ta)sa)sa*ra+ra+qa,pa,pa-oa.oa.na/ma/ma0la1la1ka2ja2ja3ia4ha4ha5ga6ga6fa7ea7ea8da9da9ca:ba:ba;aa^a?^a?]a@\a@\aA[aB[aBZaCYaCYaDXaEWaEWaFVaGVaGUaHTaHTaISaJSaJRaKQaKQaLPaMPaMOaNNaNNaOMaPMaPLaQKaQKaRJaSIaSIaTHaUHaUGaVFaVFaWEaXEaXDaYCaYCaZBa[Ba[Aa\@a\@a]?a^?a^>a_=a_=a`;97 4 2 / - + ( & $ "     "#%'(*,-/1 3 5 6 8 : < > @ B DEGIKMPRTVXZ\^acccbaa`__^^]\\[[Z Y Y X X W V V UTTSSRQQPPONNMMLKKJIIHHGFFEEDC C B!B"A"@#@#?$>%>%=&=&<';(;(:):)9*8+8+7,7-6-5.5.4/30302121120303/4/4.5-6-6,7,8+8*9*9):(;(;'<'<&=%>%>$?$?#@"A"A!B!C CDDEFFGGHIIJJKLLMNNOOPQQRRSTTUU V W W X Y Y Z Z[\\]]^__``abbcdddddddddddddddd d d d d d d d dddddddddddddddddddddddd~d~d}d}d|d{d{d zd zd!yd"xd"xd#wd$vd$vd%ud%ud&td'sd'sd(rd(rd)qd*pd*pd+od+od,nd-md-md.ld/kd/kd0jd0jd1id2hd2hd3gd3gd4fd5ed5ed6dd6dd7cd8bd8bd9ad:`d:`d;_d;_d<^d=]d=]d>\d>\d?[d@Zd@ZdAYdAYdBXdCWdCWdDVdEUdEUdFTdFTdGSdHRdHRdIQdIQdJPdKOdKOdLNdLNdMMdNLdNLdOKdPJdPJdQIdQIdRHdSGdSGdTFdTFdUEdVDdVDdWCdWCdXBdYAdYAdZ@d[?d[?d\>d\>d]=d^ @ B D EGIKMORTVXZ\^aceeeddcbba``__^]]\ \ [ Z Z Y Y X WWVUUTTSRRQQPOONMMLLKJJIIHGGFE E D!D"C"B#B$A$A%@%?&?'>'=(=(<)<*;*:+:+9,9-8-7.7/6/60504142322323141505/6/7.7.8-8,9,:+:*;*;)<)=(='>'?&?&@%@$A$B#B"C"C!D!E EFGGHHIJJKKLMMNNOPPQRRSSTUUVVWX X Y Z Z [ [ \ ]]^^_``abbccdeeffffffffffffffff f f f f f f f ffffffffffffffffffff~f~f}f}f|f{f{fzfyfyfxf xf!wf!vf"vf#uf#uf$tf$sf%sf&rf&qf'qf'pf(pf)of)nf*nf+mf+mf,lf,kf-kf.jf.jf/if/hf0hf1gf1ff2ff2ef3ef4df4cf5cf6bf6bf7af7`f8`f9_f9^f:^f:]f;]f<\f<[f=[f>Zf>Zf?Yf?Xf@XfAWfAVfBVfBUfCUfDTfDSfESfFRfFRfGQfGPfHPfIOfINfJNfJMfKMfLLfLKfMKfNJfNJfOIfOHfPHfQGfQGfRFfREfSEfTDfTCfUCfUBfVBfWAfW@fX@fY?fY?fZ>fZ=f[=f\ @ B C E GIKMORTVXZ\^aceghgffeedccbba``_ ^ ^ ] ] \ [ [ Z YYXXWVVUUTSSRQQPPONNMMLKKJIIHH G!F!F"E"E#D$C$C%B&A&A'@'@(?)>)>*=*<+<,;,;-:.9.9/8/8071616252434434352616170708/9.9.:-;,;,<+<+=*>)>)?(?(@'A&A&B%C$C$D#D#E"F!F!G GHIIJKKLLMNNOOPQQRSSTTUVVWXXYYZ [ [ \ \ ] ^ ^ _``aabccddeffghhhhhhhhhhhhhhhhh h h h h h h h hhhhhhhhhhhhhhhh~h~h}h}h|h{h{hzhyhyhxhxhwhvh vh uh!uh"th"sh#sh#rh$qh%qh%ph&ph&oh'nh(nh(mh)lh*lh*kh+kh+jh,ih-ih-hh.hh.gh/fh0fh0eh1dh2dh2ch3ch3bh4ah5ah5`h6`h7_h7^h8^h8]h9\h:\h:[h;[h;ZhXh?Wh?Vh@Vh@UhAThBThBShCShCRhDQhEQhEPhFOhGOhGNhHNhHMhILhJLhJKhKKhKJhLIhMIhMHhNGhOGhOFhPFhPEhQDhRDhRChSChTBhTAhUAhU@hV?hW?hW>hX>hX=hY @ B C E G IKMORTVXZ\^acegjjiihhgffeddccba a ` _ _ ^ ^ ] \\[ZZYYXWWVVUTTSRRQQPOONMMLLKJ J I!I!H"G#G#F$E$E%D&D&C'B(B(A)@)@*?+?+>,=-=-<.;.;/:0:091818273736455554636372828190:0:/;.;.<-=-=,>+?+?*@*@)A(B(B'C&C&D%E%E$F#G#G"H!H!I J JKLLMMNOOPPQRRSTTUUVWWXYYZZ[\\ ] ^ ^ _ _ ` a abbcddeffgghiijjjjjjjjjjjjjjjjj j j j j j j j jjjjjjjjjjjj~j~j}j}j|j{j{jzjyjyjxjxjwjvjvjujtjtj sj!sj!rj"qj"qj#pj$oj$oj%nj%nj&mj'lj'lj(kj)kj)jj*ij*ij+hj,gj,gj-fj.fj.ej/dj/dj0cj1bj1bj2aj2aj3`j4_j4_j5^j6^j6]j7\j7\j8[j9Zj9Zj:Yj;Yj;XjUj>Uj?Tj@Tj@SjARjARjBQjCPjCPjDOjDOjENjFMjFMjGLjHLjHKjIJjIJjJIjKHjKHjLGjMGjMFjNEjNEjODjPCjPCjQBjRBjRAjS@jS@jT?jU?jU>jV=jV=jW < 9 7 4 2 0 - + (&$"     !#%&(*,-/13468: < > @ B C E G I K MORTVXZ\^`cegjlllkjjiihggfeedd c b b a ` ` _ _^]]\[[ZZYXXWWVUUTSSRRQPPONNMM L K!K"J"I#I#H$H%G%F&F'E'D(D(C)C*B*A+A,@,?-?->.>/=/<0<0;1:2:293948475756657574849392:2:1;1<0.>-?-?,@,A+A*B*C)C(D(D'E'F&F%G%H$H#I#I"J"K!K L MMNNOPPQRRSSTUUVVWXXYZZ[[\]]^_ _ ` ` a b b c ddeefgghiijjkllmmmmmmmmmmmmmmmm m m m m m m m mmmmmmmm~m~m}m}m|m{m{mzmymymxmxmwmvmvmumtmtmsmsmrm qm qm!pm!om"om#nm#nm$mm$lm%lm&km&jm'jm(im(im)hm)gm*gm+fm+em,em-dm-dm.cm.bm/bm0am0`m1`m2_m2_m3^m3]m4]m5\m5[m6[m7Zm7Zm8Ym8Xm9Xm:Wm:Vm;VmSm?Rm?Rm@QmAPmAPmBOmBNmCNmDMmDMmELmFKmFKmGJmGImHImIHmIHmJGmJFmKFmLEmLDmMDmNCmNCmOBmOAmPAmQ@mQ?mR?mS>mS>mT=mT ; 9 6 4 1 / - *(&#!     !#%&(*,-/13468:< > @ B C E G I K M ORTVXZ\^`cegilnonmmllkjjihhggf e e d c c b b a``_^^]]\[[ZYYXXWVVUTTSRRQQPOO N M!M"L"L#K$J$J%I%H&H'G'G(F)E)E*D*C+C,B,B-A.@.@/?/>0>1=1=2<3;3;4:495968687786869594:4;3;3<2=1=1>0?/?/@.@.A-B,B,C+D*D*E)E)F(G'G'H&I%I%J$J$K#L"L"M!N N OOPQQRSSTTUVVWXXYYZ[[\]]^^_``a b b c c d e e fgghhijjkllmmnoooooooooooooooo o o o o o o o ooooo~o~o}o|o|o{o{ozoyoyoxowowovovouototosororoqoqopooo oo no!mo"mo"lo#lo#ko$jo%jo%io&ho'ho'go(go(fo)eo*eo*do+co,co,bo-bo.ao.`o/`o/_o0^o1^o1]o2]o3\o3[o4[o4Zo5Yo6Yo6Xo7Xo8Wo8Vo9Vo9Uo:To;To;SoQo>Po?Oo@Oo@NoANoBMoBLoCLoCKoDJoEJoEIoFIoGHoGGoHGoHFoIEoJEoJDoKDoLCoLBoMBoMAoN@oO@oO?oP?oQ>oQ=oR=oR @ B C E G I K M O QTVXZ\^`cegilnpqppoonmmlkkjiih h g f f e d d ccbaa`__^^]\\[ZZYYXWWVUUTSSRRQ P!P!O"N"N#M$M$L%K&K&J'I(I(H)H)G*F+F+E,D-D-C.B.B/A0A0@1?2?2>3=3=4<5<5;6:7:79889897:7:6;5<5<4=3>3>2?1?1@0A0A/B.C.C-D,D,E+F+F*G)H)H(I'J'J&K&K%L$M$M#N"O"O!P P QRRSTTUUVWWXYYZ[[\\]^^_``aabcc d e e f f g h hijjkklmmnoopqqqqqqqqqqqqqqqqq q q q q q q q q~q~q}q|q|q{q{qzqyqyqxqwqwqvqvquqtqtqsqrqrqqqqqpqoqoqnqmqmq lq!kq!kq"jq"jq#iq$hq$hq%gq&fq&fq'eq(eq(dq)cq)cq*bq+aq+aq,`q-`q-_q.^q.^q/]q0\q0\q1[q2[q2Zq3Yq3Yq4Xq5Wq5Wq6Vq7Uq7Uq8Tq8Tq9Sq:Rq:Rq;QqOq>Nq?Mq?Mq@LqAKqAKqBJqCJqCIqDHqDHqEGqFFqFFqGEqHDqHDqICqICqJBqKAqKAqL@qM?qM?qN>qO>qO=qP@ B C E G I K M O Q TVXZ\^`cegilnpsssrrqpponnmllkk j i i h g g f feddcbba``__^]]\[[ZZYXXWVVUTTS S!R!Q"Q#P#O$O%N%M&M&L'L(K(J)J*I*H+H,G,G-F-E.E/D/C0C1B1A2A2@3@4?4>5>6=6<7<8;8;9:99:9;8;7<7=6=5>5>4?4@3@2A2B1B0C0D/D/E.E-F-G,G+H+I*I)J)K(K(L'L&M&N%N$O$P#P"Q"Q!R!S STUUVWWXXYZZ[\\]]^__`aabccddef f g h h i i j kklmmnooppqrrstttttttttttttttt t t t t ~t ~t }t |t|t{t{tztytytxtwtwtvtvtutttttstrtrtqtptptototntmtmtltktkt jt it!it!ht"ht#gt#ft$ft%et%dt&dt'ct'ct(bt(at)at*`t*_t+_t,^t,]t-]t-\t.\t/[t/Zt0Zt1Yt1Xt2Xt3Wt3Wt4Vt4Ut5Ut6Tt6St7St8Rt8Qt9Qt9Pt:Pt;Ot;NtLt?Kt?Kt@Jt@ItAItBHtBGtCGtDFtDEtEEtFDtFDtGCtGBtHBtIAtI@tJ@tK?tK>tL>tL=tM=tN < 9 7 4 2 0-+)&$"      !#%&(*,-/13468:<>@A C E G I K M O Q T VXZ\^`cegilnpsuvuutssrqqpponnm l l k j j i i hggfeedccbba``_^^]\\[[ZYYXWWV U U!T"T"S#R#R$Q%P%P&O'N'N(M)M)L*K*K+J,I,I-H.G.G/F0F0E1D1D2C3B3B4A5@5@6?7?7>8=8=9<:;:;;:<9<9=8>8>7?6?6@5A4A4B3C2C2D1E1E0F/F/G.H-H-I,J+J+K*L*L)M(M(N'O&O&P%Q%Q$R#S#S"T!T!U VVWXXYYZ[[\]]^__``abbcddeffggh i i j k k l m mnnoppqrrsttuuvvvvvvvvvvvvvvvv ~v ~v }v |v |v {v {v zvyvyvxvwvwvvvuvuvtvtvsvrvrvqvpvpvovnvnvmvmvlvkvkvjvivivhv gv gv!fv"fv"ev#dv$dv$cv%bv&bv&av'`v'`v(_v)_v)^v*]v+]v+\v,[v,[v-Zv.Yv.Yv/Xv0Xv0Wv1Vv2Vv2Uv3Tv3Tv4Sv5Sv5Rv6Qv7Qv7Pv8Ov9Ov9Nv:Mv:Mv;LvJv>Iv?Hv@Hv@GvAFvAFvBEvCEvCDvDCvECvEBvFAvGAvG@vH?vH?vI>vJ>vJ=vK ; 9 6 4 1/-*(&#!      !#%&(*,-/13468:<>@ACE G I K M O Q T V XZ\^`cegilnpsuxxxwvvuttssrqqpo o n m m l l k jjihhgffeedccbaa`__^]]\\[ZZYX X W!V"V"U#U$T$S%S&R&Q'Q'P(O)O)N*N+M+L,L-K-J.J/I/H0H0G1F2F2E3E4D4C5C6B6A7A7@8?9?9>:>;=;<<<=;=:>:>9?8@8@7A7B6B5C5D4D3E3F2F1G1G0H/I/I.J.K-K,L,M+M*N*N)O(P(P'Q'R&R%S%T$T#U#U"V!W!W X YYZ[[\]]^^_``abbcddeefgghiijk k l l m n n o ppqrrsttuuvwwxyyyyyyyyyyyy~y~y}y|y |y {y {y zy yy yy xy wywyvyuyuytytysyryryqypypyoynynymylylykykyjyiyiyhygygyfyey ey!dy!dy"cy#by#by$ay$`y%`y&_y&^y'^y(]y(]y)\y*[y*[y+Zy,Yy,Yy-Xy-Wy.Wy/Vy/Uy0Uy1Ty1Ty2Sy3Ry3Ry4Qy4Py5Py6Oy6Ny7Ny8My8My9Ly:Ky:Ky;Jy;IyGy?Fy?Fy@EyADyADyBCyCByCByDAyD@yE@yF?yF>yG>yH=yH=yI@ACEG I K M O Q T V X Z\^`cegilnpsuxz{zyyxxwvvuttsrr q q p o o n m mlkkjiihhgffeddcbba``__^]]\[[ Z!Y!Y"X#W#W$V$V%U&T&T'S(R(R)Q*P*P+O,N,N-M-M.L/K/K0J1I1I2H3G3G4F5E5E6D6D7C8B8B9A:@:@;?<><>==><>E{>D{?C{@C{@B{AA{BA{B@{C?{D?{D>{E>{E={F<{G<{G;{H:{I:{I9{J8{K8{K7{L6{M6{M5{N5{N4{O3{P3{P2{Q1{R1{R0{S/{T/{T.{U-{V-{V,{W,{W+{X*{Y*{Y){Z({[({['{\&{]&{]%{^${_${_#{`#{`"{a!{b!{b {c{d{d{e{f{f{g{h{h{i{i{j{k{k{l{m{m{n{o{o{p{q{q{r{r{s{t{t{u {v {v {w {x {x {y {y {z{{{{{|{}{}{~{{{{{{{zwtqnkheb_\ZWTQO L I G D B ? = : 8530.,)'%"        !#%&(*,-/13468:<>@ACEGI K M O Q T V X Z \^`cegilnpsuxz}}}|{{zyyxwwvvut t s r r q p p onnmllkkjiihggfeedccbba``_^^] \!\!["Z#Z#Y$X%X%W&W'V'U(U)T)S*S*R+Q,Q,P-O.O.N/N0M0L1L2K2J3J3I4H5H5G6F7F7E8D9D9C:C;B;A?>>?=@=@B~>B~?A~?@~@@~A?~A>~B>~C=~C=~D<~E;~E;~F:~G9~G9~H8~H7~I7~J6~J5~K5~L4~L3~M3~N2~N2~O1~P0~P0~Q/~R.~R.~S-~S,~T,~U+~U*~V*~W)~W)~X(~Y'~Y'~Z&~[%~[%~\$~]#~]#~^"~^!~_!~` ~`~a~b~b~c~d~d~e~f~f~g~g~h~i~i~j~k~k~l~m~m~n~o~o~p~q~q~r~r ~s ~t ~t ~u ~v ~v ~w ~x~x~y~z~z~{~{~|~}~}~~~~~|yvspmjgda_\YVSQ N K I F D A > < 97520-+)&$"       !#%&(*,-/13468:<>@ACEGIK M O Q S V X Z \ ^`cegilnpsuxz}~~~~~~}~}~|~{~{~z~y~y~x~w~w ~v ~u ~u ~t ~s ~s ~r ~r~q~p~p~o~n~n~m~l~l~k~j~j~i~h~h~g~f~f~e~e~d~c~c~b~a~a~` ~_ ~_!~^"~]"~]#~\$~[$~[%~Z%~Z&~Y'~X'~X(~W)~V)~V*~U+~T+~T,~S-~R-~R.~Q/~P/~P0~O1~N1~N2~M2~M3~L4~K4~K5~J6~I6~I7~H8~G8~G9~F:~E:~E;~D<~C<~C=~B=~B>~A?~@?~@@~?A~>A~>B~=C~????>@=A=A ;9642/-*(&#!       !#%&(*,-/13468:<>@ACEGIKM O Q S V X Z \ ^ `cegilnpsuxz}~|{{{{{{~{~{}{|{|{{{{{z{y {y {x {w {w {v {u {u{t{s{s{r{q{q{p{o{o{n{m{m{l{l{k{j{j{i{h{h{g{f{f{e{d{d{c{b {b {a!{`"{`"{_#{^${^${]%{]&{\&{['{[({Z({Y){Y*{X*{W+{W,{V,{U-{U-{T.{S/{S/{R0{Q1{Q1{P2{O3{O3{N4{N5{M5{L6{L7{K7{J8{J9{I9{H:{H;{G;{F<{F<{E={D>{D>{C?{B@{B@{AA{@B{@B{?C{?D{>D{=E{=F{=>>=>@ACEGIKMO Q S V X Z \ ^ ` cegilnpsuxz}}|zyyyyyyyyyy~y~y}y|y| y{ yz yz yy yx yx ywyvyvyuyuytysysyryqyqypyoyoynymymylykykyjyiyiyhygygyfyeye yd!yc!yc"yb#yb#ya$y`%y`%y_&y^'y^'y](y\(y\)y[*yZ*yZ+yY,yX,yX-yW.yV.yV/yU0yT0yT1yS2yR2yR3yQ4yP4yP5yO6yO6yN7yM8yM8yL9yK:yK:yJ;yI;yIyF?yE?yE@yDAyCAyCByBCyACyADy@Ey?Ey?Fy>Gy=Gy=Hy;=;=<<=;=;>:?:?9@8A8A7B6C6C5D4E4E3F2G2G1H0I0I/J.K.K-L,L,M+N*N*O)P(P(Q'R'R&S%T%T$U#V#V"W!X!X YZZ[\\]^^__`aabccdeefgghiijkk l m m n o o pqqrrsttuvvwxxz|}zwtqnkheb`]ZW T R O L J G D B ?=:8530.,)'%"        !#%&(*,-/13468:<>@ACEGIKMOQ S V X Z \ ^ ` c egilnpsuxz|}{ywvvvvvvvvvvvvvv~ v~ v} v| v| v{ vz vzvyvxvxvwvvvvvuvtvtvsvrvrvqvpvpvovnvnvmvmvlvkvkvjvivivhvg vg!vf!ve"ve#vd#vc$vc%vb%va&va'v`'v_(v_)v^)v]*v]+v\+v[,v[-vZ-vY.vY/vX/vW0vW1vV1vU2vU3vT3vS4vS5vR5vQ6vQ7vP7vP8vO9vN9vN:vM:vL;vLvI>vH?vH@vG@vFAvFBvEBvDCvDDvCDvBEvBFvAFv@Gv@Hv?Hv>Iv>Jv=Jv8>9=9<:<:;;:<:<9=9>8>7?7@6@5A5B4B3C3D2D1E1F0F/G/H.H-I-J,J+K+L*L)M)N(N'O'P&P%Q%R$R#S#T"T!U!V VWWXYYZ[[\]]^__`aabccdeefgghi i j k k l m mnoopqqrrsttuvwz|}zwtpmkheb_\Y W T Q N L I F D A?<:7520-+)&$"        !#%&(*+-/13468:<>?ACEGIKMOQSV X Z \ ^ ` c e g ilnpsuxz|~|zxvttttttttttttttt t t t t~ t~ t} t|t|t{tztztytxtxtwtvtvtutttttstrtrtqtptptotntntmtltltktj tj ti!th"th"tg#tf$tf$te%td&td&tc'tb(tb(ta)t`*t`*t_+t^,t^,t]-t\.t\.t[/tZ0tZ0tY1tX2tX2tW3tV4tV4tU5tT5tT6tS7tS7tR8tQ9tQ9tP:tO;tO;tNtK?tK?tJ@tIAtIAtHBtGCtGCtFDtEEtEEtDFtCGtCGtBHtAItAIt@Jt?Kt?Kt>Lt=Mt=Mt6=6=7<8;8;9::9:9;8<7<7=6>5>5?4@4@3A2A2B1C0C0D/E.E.F-G,G,H+I*I*J)K(K(L'M&M&N%O$O$P#Q"Q"R!S S TUUVWWXYYZ[[\]]^__`aabccdeef g g h i i j kklmmnoopqqrssuwz||yvspmjgda^[ Y V S P N K H F CA><9742/-+(&$!         !#%&(*+-/13468:<>?ACEGIKMOQSVX Z \ ^ ` c e g i lnpsuxz|}{zxvtrqqqqqqqqqqqqqq q q q q q q qq~q}q}q|q{q{qzqyqyqxqwqwqvququqtqsqsqrqrqqqpqpqoqnqnqm ql ql!qk"qj"qj#qi$qh$qh%qg&qf&qf'qe(qd(qd)qc*qb*qb+qa,q`,q`-q_.q^.q^/q]0q\0q\1q[2qZ2qZ3qY4qX4qX5qW6qV6qV7qU8qT8qT9qS:qR:qR;qQqN>qN?qM@qL@qLAqKBqJBqJCqIDqHDqHEqGFqFFqFGqEHqDHqDIqCJqBJqBKqALq@Lq@Mq?Nq>Nq>Oq=Pq3>3=4<5<5;6:7:79889897:6;6;5<4=4=3>2?2?1@0A0A/B.C.C-D,E,E+F*G*G)H(I(I'J&K&K%L$M$M#N"O"O!P Q QRSSTUUVWWXYYZ[[\]]^__`aabcc d e e f g g hiijkklmmnoopqruwz|{xurolifca ^ [ X U S P M J H EC@=;8641/,*(%#!         !#%&(*+-/13468:<>?ACEGIKMOQSVXZ \ ^ ` c e g i l npsuxz|}{ywusqonnnnnnnnnnnnn n n n n n n n nnnnnn~n}n}n|n{n{nznynynxnwnwnvnununtnsnsnrnqnqnpno no!nn!nm"nm#nl#nk$nk%nj%ni&ni'nh'ng(ng)nf)ne*ne+nd+nc,nc-nb-na.na/n`/n_0n_1n^1n]2n]3n\3n[4n[5nZ5nY6nY7nX7nW8nW9nV9nU:nU;nT;nSnQ?nP?nO@nOAnNAnMBnMCnLCnKDnKEnJEnIFnIGnHGnGHnGInFInEJnEKnDKnCLnCMnBMnANnAOn@On?Pn>Qn>Qn=Rn0=1=2<2;3;4:4959686777868595:4:3;3<2<1=1>0>/?/@.@-A-B,B+C+D*D)E)F(F'G'H&H%I%J$J#K#L"L!M!N NOPPQRRSTTUVVWXXYZZ[\\]^^_`` a b b c d d e ffghhijjklmmnoruwz|~{xurolifc ` ] Z W U R O M JGEB?=:8531.,)'%"       !!   !#%&(*+-/13468:<>?ACEGIKMOQSVXZ\ ^ ` c e g i l n psuxz~||zxvtrpolkkkkkkkkkkkkk k k k k k k k kkkkkkkkkk~k}k}k|k{k{kzkykykxkwkwkvkukuktkskskr kq!kq!kp"ko#ko#kn$km%km&kl&kk'kj(kj(ki)kh*kh*kg+kf,kf,ke-kd.kd.kc/kb0kb0ka1k`2k`2k_3k^4k^4k]5k\6k\6k[7kZ8kZ8kY9kX:kX:kW;kVkT>kS?kR@kR@kQAkPBkPBkOCkNDkNDkMEkLFkLFkKGkJHkJIkIIkHJkHKkGKkFLkEMkEMkDNkCOkCOkBPkAQkAQk@Rk?Sk?Sk>Tk=Uk=Uk->.=/-?,?,@+A*A*B)C(C(D'E&E&F%G$G#H#I"I!J!K LLMNNOPPQRRSTTUVVWXXYZZ[\\]^ ^ _ ` ` a b b cddeffghhijjkmoruwz|}zwtqnkhe b _ \ Z W T Q O LIGDA?<:7520.+)&$"       !!!    !#%&(*+-/13468:<>?ACEGIKMOQSUXZ\^ ` c e g i l n p suwz}|{zxvtrpnljiiiiiiiiiiiii i i i i i i i iiiiiiiiiiiiii~i}i}i|i{i{iziyiyixiwiviviu it it!is"ir"ir#iq$ip$ip%io&in&in'im(il(il)ik*ij*ij+ii,ih,ih-ig.if/if/ie0id1id1ic2ib3ib3ia4i`5i_5i_6i^7i]7i]8i\9i[9i[:iZ;iY;iYiV?iU?iU@iTAiSAiSBiRCiQCiQDiPEiOFiOFiNGiMHiMHiLIiKJiKJiJKiILiHLiHMiGNiFNiFOiEPiDPiDQiCRiBRiBSiATi@Ti@Ui?Vi>Vi>Wi=Xi+=,=,<-;.;.:/909081727263545445363627181809/:/:.;-<,<,=+>*>*?)@(@(A'B&C&C%D$E$E#F"G"G!H I IJKKLMMNOOPQQRSSTUUVWWXYZZ[ \ \ ] ^ ^ _ ` `abbcddeffghhjmoruwz|}zvspmjg d b _ \ Y V S Q NKIFCA><9742/-+(&$!      ""!!    !#%&(*+-/12468:<>?ACEGIKMOQSUXZ\^` b e g i l n p s uw~z}|{ywusqomkigfffffffffffff f f f f f f f ffffffffffffffffff~f}f|f|f{fzfzfyfxfx fw fv!fv"fu#ft#ft$fs%fr%fr&fq'fp'fp(fo)fn)fn*fm+fl+fk,fk-fj-fi.fi/fh/fg0fg1ff1fe2fe3fd4fc4fc5fb6fa6fa7f`8f_8f_9f^:f]:f];f\fY>fX?fX@fW@fVAfVBfUBfTCfTDfSEfREfRFfQGfPGfPHfOIfNIfNJfMKfLKfLLfKMfJMfINfIOfHOfGPfGQfFQfERfESfDSfCTfCUfBVfAVfAWf@Xf?Xf?Yf>Zf=Zf=[f<\f;\f;]f:^f9^f8_f8`f7`f6af6bf5bf4cf4df3df2ef2ff1gf0gf0hf/if.if.jf-kf,kf,lf+mf*mf*nf)of(of'pf'qf&qf%rf%sf$sf#tf#uf"uf!vf!wf xfxfyfzfzf{f|f|f}f~f~ffffffffffffffffff f f f f f f fffffffffffffffeddcbba``_^^] \ \ [ Z Z Y XXWVVUTSSRQQPOONMMLKKJIIHGG F E!E"D"C#B$B$A%@&@&?'>(>(=)<*<*;+:,:-9-8.8/7/60615142433324151506/7/7.8-9-9,:+;+;*<)=)>(>'?'@&@%A%B$B#C#D"D!E F FGHHIJJKLLMNOOPQQRSSTUUVWWX Y Y Z [ [ \ ] ]^_``abbcddefgjmoruwz||yvspmj g d a ^ [ X V S PMKHEC@>;9641/,*(%#!      """!!!   !#%&(*+-/12468:<>?ACEGIKMOQSUXZ\^`b e g i l n p s u w~z||zxvtrpomjhfdccccccccccccc c c c c c c c ccccccccccccccccccccc~c~c}c|c|c{cz cz!cy!cx"cx#cw#cv$cv%cu%ct&ct'cs'cr(cq)cq*cp*co+co,cn,cm-cm.cl.ck/ck0cj0ci1ci2ch2cg3cg4cf4ce5cd6cd7cc7cb8cb9ca9c`:c`;c_;c^c\?c[?cZ@cZAcYAcXBcWCcWDcVDcUEcUFcTFcSGcSHcRHcQIcQJcPJcOKcOLcNLcMMcMNcLNcKOcKPcJQcIQcHRcHScGScFTcFUcEUcDVcDWcCWcBXcBYcAYc@Zc@[c?[c>\c>]c=]c<^c;_c;`c:`c9ac9bc8bc7cc7dc6dc5ec5fc4fc3gc3hc2hc1ic1jc0jc/kc.lc.mc-mc,nc,oc+oc*pc*qc)qc(rc(sc'sc&tc&uc%uc$vc$wc#wc"xc!yc!zc zc{c|c|c}c~c~cccccccccccccccccccccc c c c c c c cccccccccccccccbba``_^]]\[[ Z Y Y X W W V UUTSSRQQPONNMLLKJJIHHGFFEDD C!B!A"A#@#?$?%>%=&='<';(;):)9*9+8+7,7-6.5.4/40302122120304/4.5.6-6,7,8+8*9*:);(;'<'=&=%>%?$?#@#A"A!B!C CDEEFGGHIJJKLLMNNOPPQRRSTTU V W W X Y Y Z [[\]]^__`aabcegjmoruwz~|{xurol i f c ` ] [ X U ROMJGEB@=;8631.,)'%"       ##""!!!    !#%&(*+-/12468:<>?ACEGIKMOQSUXZ\^`be g i l n p s u w} z{|yxvtrpnljhfda````````````` ` ` ` ` ` ` ``````````````````````````~`~`} `|!`|"`{"`z#`z$`y$`x%`x&`w&`v'`u(`u(`t)`s*`s*`r+`q,`q-`p-`o.`o/`n/`m0`m1`l1`k2`j3`j3`i4`h5`h5`g6`f7`f8`e8`d9`d:`c:`b;`b<`a<``=`_>`_>`^?`]@`]@`\A`[B`[C`ZC`YD`YE`XE`WF`WG`VG`UH`TI`TI`SJ`RK`RK`QL`PM`PN`ON`NO`NP`MP`LQ`LR`KR`JS`IT`IT`HU`GV`GV`FW`EX`EY`DY`CZ`C[`B[`A\`A]`@]`?^`>_`>_`=``">#=$<$<%;&:&:'9(8(8)7*6*5+5,4,3-3.2/1/1001/1/2.3-3-4,5+5*6*7)7(8(9':&:&;%<$<$=#>">"?!@ @ABBCDEEFGGHIIJKKLMMNOPPQRRS T T U V V W XXYZ[[\]]^__`begjmoruwz~|{xtqn k h e c ` ] Z W TROLIGDB?<:7530.+)'$"       ###"""!!    !#%&(*+-/12468:<>?ACEGIKMOQSUXZ\^`begi l n p s u~ w} z{ |ywusqomkigeca_]]]]]]]]]]]]] ] ] ] ] ] ] ]]]]]]]]]]]]]]]]]]]]]]]]]]] ] ]!]~"]~"]}#]|$]|$]{%]z&]y']y']x(]w)]w)]v*]u+]u+]t,]s-]s-]r.]q/]q0]p0]o1]n2]n2]m3]l4]l4]k5]j6]j6]i7]h8]h8]g9]f:]e;]e;]d<]c=]c=]b>]a?]a?]`@]_A]_A]^B]]C]\D]\D][E]ZF]ZF]YG]XH]XH]WI]VJ]VJ]UK]TL]SM]SM]RN]QO]QO]PP]OQ]OQ]NR]MS]MS]LT]KU]JV]JV]IW]HX]HX]GY]FZ]FZ]E[]D\]D\]C]]B^]A_]A_]@`]?a]?a]>b]=c]=c] = =!<";";#:$9$8%8&7'6'6(5)4)4*3+2+2,1-0-0.//.0-0-1,2+2+3*4)4)5(6'6'7&8%9$9$:#;";"??@ABBCDDEFFGHHIJKKLMMNOOP Q Q R S S T UVVWXXYZZ[\\]_begjmoruwz}|zwtq n k h e b _ \ Y VTQNLIFDA><9742/-+(&$!      $$###""!!!  !#%&(*+-/12468:<=?ACEGIKMOQSUXZ\^`begil n p s u~ w| zz |x vtrpomkhfdb`^\[[[[[[[[[[[[[ [ [ [ [ [ [ [[[[[[[[[[[[[[[[[[[[[[[[[[[ [ [!["[#[#[$[~%[~%[}&[|'[{'[{([z)[y*[y*[x+[w,[w,[v-[u.[u.[t/[s0[r0[r1[q2[p3[p3[o4[n5[n5[m6[l7[k7[k8[j9[i9[i:[h;[g<[g<[f=[e>[e>[d?[c@[b@[bA[aB[`C[`C[_D[^E[^E[]F[\G[\G[[H[ZI[YI[YJ[XK[WL[WL[VM[UN[UN[TO[SP[RP[RQ[QR[PS[PS[OT[NU[NU[MV[LW[LW[KX[JY[IY[IZ[H[[G\[G\[F][E^[E^[D_[C`[B`[Ba[Ab[@c[@c[?d[>e[>e[=f[>=<<; :!9!9"8#7#7$6%5%5&4'3(3(2)1*0*0+/,.,.--.,.,/+0*1*1)2(3'3'4&5%5%6$7#8#8"9!: : ;<<=>>?@AABCCDEEFGHHIJJKLLM N N O P Q Q RSSTUUVWXXYZZ\_begjmoruwz}|yvs p m j g d a ^ \YVSPNKHFC@>;9641/,*(%#!       $$$###"""!! ! #%&(*+-/12468:<=?ACEGIKMOQSUXZ\^`begiln p s u} w{ zy |x v trpnljhfdb_][YXXXXXXXXXXXXX X X X X X X XXXXXXXXXXXXXXXXXXXXXXXXXXX X!X!X"X#X#X$X%X&X&X'X~(X}(X})X|*X{*X{+Xz,Xy-Xy-Xx.Xw/Xv/Xv0Xu1Xt1Xt2Xs3Xr4Xr4Xq5Xp6Xp6Xo7Xn8Xm8Xm9Xl:Xk;Xk;XjXg?Xf?Xf@XeAXdBXdBXcCXbDXbDXaEX`FX_FX_GX^HX]HX]IX\JX[KX[KXZLXYMXXMXXNXWOXVOXVPXUQXTRXTRXSSXRTXQTXQUXPVXOVXOWXNXXMYXMYXLZXK[XJ[XJ\XI]XH]XH^XG_XF`XF`XEaXDbXCbXCcXBdXAdXAeX@fX?gX?gX>hX=iX==<;;:988 7!6"6"5#4$4$3%2&1&1'0(/)/).*-+-+,,+-*-*.)/(0(0'1&2&2%3$4#4#5"6!7!7 899:;;<=>>?@@ABBCDDEFGGHIIJ K K L M N N OPPQRRSTUUVWWY\_begjmoruwz||yv r o l i f d a ^[XURPMJHEB@=;8631.,*'%"        %%$$$##"""!! !! # %&(*+-/12468:<=?ACEGIKMOQSUXZ\^`begilnp s~ u| w{ zy |w u s qomkigeca_]ZXVUUUUUUUUUUUUU U U U U U U UUUUUUUUUUUUUUUUUUUUUUUUUUU U!U"U"U#U$U$U%U&U&U'U(U)U)U*U~+U}+U},U|-U{-U{.Uz/Uy0Ux0Ux1Uw2Uv2Uv3Uu4Ut5Ut5Us6Ur7Uq7Uq8Up9Uo9Uo:Un;UmUj>Uj?Ui@Uh@UhAUgBUfCUfCUeDUdEUcEUcFUbGUaGUaHU`IU_JU^JU^KU]LU\LU\MU[NUZOUZOUYPUXQUWQUWRUVSUUSUUTUTUUSVUSVURWUQXUPXUPYUOZUNZUN[UM\UL]UL]UK^UJ_UI_UI`UHaUGaUGbUFcUEdUDdUDeUCfUBfUBgUAhU@iU@iU?jU>kU=kU=lU>=<<;:9987765 5 4!3"2#2#1$0%0%/&.'-'-(,)+*+**+),),(-'.&.&/%0$1$1#2"3"3!4 5567889::;<==>??@AABCDDEFFG H H I J K K LMMNOOPQRRSTTVY\_begjmoruw~z{|x u r o l i f c `]ZWUROLJGDB?=:8530.+)'$"         %%%$$$###""! !!!# % &(*+-/12468:<=?ACEGIKMOQSUXZ\^`begiknps~ u| wz zx |v t r p o mkifdb`^\ZXUSRRRRRRRRRRRRR R R R R R R RRRRRRRRRRRRRRRRRRRRRRRRRR R R!R"R"R#R$R%R%R&R'R'R(R)R)R*R+R,R,R-R~.R}.R}/R|0R{1Rz1Rz2Ry3Rx3Rx4Rw5Rv6Rv6Ru7Rt8Rs8Rs9Rr:Rq:Rq;RpRm?Rl?Rl@RkARjBRjBRiCRhDRgDRgERfFReFReGRdHRcIRbIRbJRaKR`KR`LR_MR^NR^NR]OR\PR[PR[QRZRRYRRYSRXTRWURVURVVRUWRTWRTXRSYRRZRRZRQ[RP\RO\RO]RN^RM^RM_RL`RKaRJaRJbRIcRHcRHdRGeRFfREfREgRDhRChRCiRBjRAkRAkR@lR?mR>mR>nR=oR==<;::98876654332 1!1!0"/#.$.$-%,&,&+'*()())(*'+'+&,%-%-$.#/"0"0!1 2 23445677899:;<<=>>?@@ABCCD E E F G H H IJJKLLMNOOPQQSVY\_begjmoruw~z{ |w t q n k h e b_\ZWTQOLIFDA?<:7520-+(&$!         &&%%%$$###"" "!!#!% & ( *+-/12468:<=?ACEGIKMOQSUXZ\^`begiknps}u{ wy zx |v t r p n l jhfdb`][YWURPOOOOOOOOOOOOO O O O O O O OOOOOOOOOOOOOOOOOOOOOOOOOO O O!O"O#O#O$O%O%O&O'O(O(O)O*O*O+O,O-O-O.O/O/O0O~1O}2O}2O|3O{4Oz4Oz5Oy6Ox7Ox7Ow8Ov9Ou9Ou:Ot;Os;OsOp>Op?Oo@On@OnAOmBOlCOkCOkDOjEOiEOiFOhGOgHOgHOfIOeJOdJOdKOcLObMObMOaNO`OO_OO_PO^QO]RO]RO\SO[TOZTOZUOYVOXVOXWOWXOVYOUYOUZOT[OS[OS\OR]OQ^OP^OP_OO`ON`ONaOMbOLcOKcOKdOJeOIeOIfOHgOGhOGhOFiOEjODjODkOClOBmOBmOAnO@oO?oO?pO>qO=qO=rO>=<;;:99877654432210/ / .!-"-",#+$*$*%)&('(''(&)%)%*$+#,#,"-!. . /01123345667889:;;<==>??@A B B C D D E FGGHIIJKLLMNNPSVY\_begjmoru w} zz |w t q m j g eb_\YVSQNKIFCA>;9641/-*(%#!     !!     &&&%%%$$$##" "!"#!%!&!( * +-/12468:<=?ACEGIKMOQSUWZ\^`begiknp~s|u{wy zw |u s q o m k i geca_][XVTROMLLLLLLLLLLLLL L L L L L L LLLLLLLLLLLLLLLLLLLLLLLLLL L!L!L"L#L$L$L%L&L&L'L(L)L)L*L+L+L,L-L.L.L/L0L0L1L2L3L3L~4L}5L|5L|6L{7Lz8Lz8Ly9Lx:Lw:Lw;LvLs?Lr?Lr@LqALpBLpBLoCLnDLmDLmELlFLkGLkGLjHLiILhILhJLgKLfLLfLLeMLdNLcNLcOLbPLaQLaQL`RL_SL^SL^TL]UL\VL\VL[WLZXLYXLYYLXZLW[LW[LV\LU]LT]LT^LS_LR`LR`LQaLPbLObLOcLNdLMeLMeLLfLKgLJgLJhLIiLHjLHjLGkLFlLElLEmLDnLCoLCoLBpLAqL@qL@rL?sL>tL>tL=uL==<;::98876554332100/..-, + +!*")#)#($'%&%&&%'$($(#)"*!*!+ ,--.//0122345567789::;<<=> ? ? @ A A B CDDEFFGHIIJKKMPSVY\_begjmor u w| zy |v s p m j gda^[XVSPMKHEC@=;8631.,*'%#     !!"     ''&&&%%%$$## #!"#"%!&!(!* + -/12468:<=?ACEGIKMOQSUWZ\^`begiknp~s|uzwxzv |t r p o m k i g db`^\ZXUSQOLJIIIIIIIIIIIII I I I I I I IIIIIIIIIIIIIIIIIIIIIIIIII I!I"I"I#I$I$I%I&I'I'I(I)I)I*I+I,I,I-I.I/I/I0I1I1I2I3I4I4I5I6I6I~7I}8I|9I|9I{:Iz;Iy;IyIw>Iv?Iu@It@ItAIsBIrCIrCIqDIpEIoFIoFInGImHImHIlIIkJIjKIjKIiLIhMIhMIgNIfOIePIePIdQIcRIbRIbSIaTI`UI`UI_VI^WI]WI]XI\YI[ZI[ZIZ[IY\IX]IX]IW^IV_IV_IU`ITaISbISbIRcIQdIQdIPeIOfINgINgIMhILiIKiIKjIJkIIlIIlIHmIGnIFnIFoIEpIDqIDqICrIBsIAtIAtI@uI?vI?vI>wI=xI > = < ; ;:99876654432110//.-,,+*)) (!'!'"&#%$$$$%#&"'"'!( ))*+,,-../01123345667889:; ; < = > > ? @@ABCCDEEFGHHJMPSVY\_begjmo r u w| zy |u r o l ifc`][XUROMJGEB?=:8530.+)'$"  !!!""    (''&&&%%%$$$ #!##"%"&"(!*!+ - / 12468:<=?ACEGIKMOQSUWZ\^`begiknp}r{uywwzv|tr p n l j h f d b`][YWUSPNLIGEEEEEEEEEEEEE E E E E E EEEEEEEEEEEEEEEEEEEEEEEEEE E E!E"E"E#E$E%E%E&E'E(E(E)E*E*E+E,E-E-E.E/E/E0E1E2E2E3E4E5E5E6E7E7E8E9E~:E~:E};E|Ey?Ey?Ex@EwAEvBEvBEuCEtDEtDEsEErFEqGEqGEpHEoIEoJEnJEmKElLElLEkMEjNEiOEiOEhPEgQEgQEfREeSEdTEdTEcUEbVEbWEaWE`XE_YE_YE^ZE][E\\E\\E[]EZ^EZ^EY_EX`EWaEWaEVbEUcEUdETdESeERfERfEQgEPhEOiEOiENjEMkEMkELlEKmEJnEJnEIoEHpEHqEGqEFrEEsEEsEDtECuEBvEBvEAwE@xE@xE?yE>zE={E={E<|E;}E:~E:~E9E8E8E7E6E5E5E4E3E3E2E1E0E0E/E.E-E-E,E+E+E*E)E(E(E'E&E&E%E$E#E#E"E!E E EEEEEEEEEEEEEEEEEEEEEEEEEE E E E E E E EEEEEEEEEEEEEEDDCBBA@??>== < ; : : 9 877655432210//.--,+**)(('& % %!$"#""#"$!% % &'(()**+,--.//012234556778 9 : : ; < <=>??@ABBCDDEGJMPSVY\_begjm o r u~ w{ zx |u r o lifc`]ZWTROLIGDA?<:7520-+(&$! !!!"""    (('''&&&%%$$ $!###%#&"("*!+!-!/ 1 2468:<=?ACEGIKMOQSUWZ\^`begikn~p|r{uywwzu|sqo m k i g e c a _][XVTRPMKIFDBBBBBBBBBBBB B B B B B B BBBBBBBBBBBBBBBBBBBBBBBBBB B B!B"B#B#B$B%B&B&B'B(B(B)B*B+B+B,B-B.B.B/B0B0B1B2B3B3B4B5B6B6B7B8B8B9B:B;B;BB}>B|?B{@B{@BzAByBByCBxCBwDBvEBvFBuFBtGBsHBsHBrIBqJBqKBpKBoLBnMBnNBmNBlOBkPBkPBjQBiRBiSBhSBgTBfUBfVBeVBdWBcXBcXBbYBaZBa[B`[B_\B^]B^^B]^B\_B[`B[`BZaBYbBYcBXcBWdBVeBVfBUfBTgBShBShBRiBQjBQkBPkBOlBNmBNnBMnBLoBKpBKpBJqBIrBIsBHsBGtBFuBFvBEvBDwBCxBCxBByBAzBA{B@{B?|B>}B>~B=~B<B;B;B:B9B9B8B7B6B6B5B4B3B3B2B1B1B0B/B.B.B-B,B+B+B*B)B)B(B'B&B&B%B$B#B#B"B!B!B BBBBBBBBBBBBBBBBBBBBBBBBBB B B B B B B BBBBBBBBBBBBBBA@@?>>=<;;: 9 8 8 7 6 6 54332100/..-,++*)(('&&%$## "!!! " ##$%&&'())*++,-../01123345 6 6 7 8 9 9 :;;<=>>?@AABDGJMPSVY\_begj m o r u~ wz zw |t qnkheb_\YVTQNKIFCA><9742/-*(&#!  ! !"""#"     )(('''&&&%%% $!$##%#&#("*"+"-!/!1 2 468:<=?ACEGIKMOQSUWZ\^`begikn~p|rzuxwvzt|rpom k i g e b ` ^ \ZXVSQOMJHECA???????????? ? ? ? ? ? ? ?????????????????????????? ?!?!?"?#?$?$?%?&?'?'?(?)?)?*?+?,?,?-?.?/?/?0?1?1?2?3?4?4?5?6?7?7?8?9?:?:?;??????~@?~A?}B?|B?{C?{D?zE?yE?xF?xG?wG?vH?vI?uJ?tJ?sK?sL?rM?qM?pN?pO?oO?nP?mQ?mR?lR?kS?kT?jU?iU?hV?hW?gX?fX?eY?eZ?dZ?c[?c\?b]?a]?`^?`_?_`?^`?]a?]b?\b?[c?Zd?Ze?Ye?Xf?Xg?Wh?Vh?Ui?Uj?Tk?Sk?Rl?Rm?Qm?Pn?Oo?Op?Np?Mq?Mr?Ls?Ks?Jt?Ju?Iu?Hv?Gw?Gx?Fx?Ey?Ez?D{?C{?B|?B}?A~?@~?????>?=?=<<;::9877 6 5 4 4 3 2 110//.-,,+*))(''&%$$#"!!  !""#$$%&''()**+,--.//012 2 3 4 5 5 6 7889::;<==>?ADGJMPSVY\_beg j m o r u} wz zw |spmjgda^\YVSPNKHEC@>;8631/,*'%#   ! ! !""###"     ))((('''&&%% %!$#$$$&#(#*"+"-"/!1!2 4 6 8:<=?ACEGIKMOQSUWZ\^`begikn}p{ryuwwvzt|rpnlj h f d b ` ^ [ YWUSPNLIGEB@=<<<<<<<<<<<< < < < < < < <<<<<<<<<<<<<<<<<<<<<<<<<< <><=<=<<<;<;<:<9<8<8<7<6<5<5<4<3<2<2<1<0<0ADGJMPSVY\_be g j m o r u| wy zv|spmjgda^[XURPMJHEB@=:8530.,)'$"   !! " ""####"    *)))(('''&&& %!%#%$$&$(#*#+#-"/"1!2!4!6 8 :;=?ACEGIKMOQSUWZ\^`begik~n|pzryuwwuzs|qomkig e c a _ ] [ Y VTRPMKIFDB?=:999999999999 9 9 9 9 9 9 9999999999999999999999999 9 9!9"9#9#9$9%9%9&9'9(9(9)9*9+9+9,9-9.9.9/90919192939494959697979899999:9;9<9<9=9>9?9?9@9A9B9B9C9D9E9E9F9~G9}H9}H9|I9{J9zJ9zK9yL9xM9wM9wN9vO9uP9uP9tQ9sR9rS9rS9qT9pU9oV9oV9nW9mX9lY9lY9kZ9j[9i[9i\9h]9g^9f^9f_9e`9da9da9cb9bc9ad9ad9`e9_f9^g9^g9]h9\i9[j9[j9Zk9Yl9Xm9Xm9Wn9Vo9Uo9Up9Tq9Sr9Rr9Rs9Qt9Pu9Pu9Ov9Nw9Mx9Mx9Ly9Kz9J{9J{9I|9H}9G~9G~9F9E9D9D9C9B9A9A9@9?9?9>9=9<9<9;9:99999897969695949393929190909/9.9.9-9,9+9+9*9)9(9(9'9&9%9%9$9#9"9"9!9 99999999999999999999999999 9 9 9 9 9 9 9999999999999877654432110 / . . - , + +*)(('&%%$##"!   !!"#$$%&''()**+ , , - . / / 012234556788;>ADGJMPSVY\_b e g j m o r u| wxzu|rolifc`]ZWUROLJGDB?<:7520-+)&$!   !! " " "##$$#"    **)))((('''& &!%#%$%&$($*$+#-#/"1"2"4!6!8 : ;=?ACEGIKMOQSUWZ\^`begik}n|pzrxuvwtzr|pomkige c ` ^ \ Z X V TQOMJHFCA><97555555555555 5 5 5 5 5 5 5555555555555555555555555 5!5!5"5#5#5$5%5&5&5'5(5)5)5*5+5,5,5-5.5/5/505152525354555556575858595:5;5;5<5=5>5>5?5@5@5A5B5C5C5D5E5F5F5G5H5I5I5~J5}K5}L5|L5{M5zN5zO5yO5xP5wQ5wR5vR5uS5tT5tU5sU5rV5qW5qX5pX5oY5nZ5n[5m[5l\5k]5k^5j^5i_5h`5h`5ga5fb5ec5ec5dd5ce5cf5bf5ag5`h5`i5_i5^j5]k5]l5\l5[m5Zn5Zo5Yo5Xp5Wq5Wr5Vr5Us5Tt5Tu5Su5Rv5Qw5Qx5Px5Oy5Nz5N{5M{5L|5K}5K~5J~5I5H5H5G5F5F5E5D5C5C5B5A5@5@5?5>5=5=5<5;5:5:595857575655545453525151505/5.5.5-5,5+5+5*5)5(5(5'5&5&5%5$5#5#5"5!5 5 5555555555555555555555555 5 5 5 5 5 555555555555555432210//.-, , + * ) ) ('&&%$##"!!  !""#$%%&'( ( ) * + + , -../011234457;>ADGJMPSVY\_ b e g j m o r~ u{wxzu|qnkheb_]ZWTQNLIFDA><9742/-*(&#!  !!!" " ###$$%#"     +***))(((''' &!&#&$%&%($*$+$-#/#1#2"4"6!8!: ; = ?ACEGIKMOQSUWZ\^`begik}n{pyrwuvwtzr|pnljhfdb ` ^ \ Y W U S QNLJGEC@>;964222222222222 2 2 2 2 2 2 2222222222222222222222222 2!2!2"2#2$2$2%2&2'2'2(2)2*2*2+2,2-2-2.2/20202122232324252626272829292:2;2<2<2=2>2?2?2@2A2B2B2C2D2E2E2F2G2H2H2I2J2K2K2L2M2~N2}N2|O2|P2{Q2zQ2yR2yS2xT2wT2vU2vV2uW2tW2sX2sY2rZ2qZ2p[2p\2o]2n]2n^2m_2l`2k`2ka2jb2ic2hc2hd2ge2ff2ef2eg2dh2ci2bi2bj2ak2`l2_l2_m2^n2]o2\o2\p2[q2Zr2Yr2Ys2Xt2Wu2Vu2Vv2Uw2Tx2Sx2Sy2Rz2Q{2P{2P|2O}2N~2M~2M2L2K2J2J2I2H2G2G2F2E2D2D2C2B2A2A2@2?2>2>2=2<2;2;2:292828272625252423222221202/2/2.2-2,2,2+2*2)2)2(2'2&2&2%2$2#2#2"2!2 2 2222222222222222222222222 2 2 2 2 2 22222222222222100/.--,+** ) ( ' ' & % $$#"!!   !"##$ % & & ' ( ) )*+,,-.//01247;>ADGJMPSVY\ _ b e g j m o r}uzwwzt|qnkheb_\YVSQNKHFC@>;9641/,*'%#   !!""" # #$$$%%#"     ++***)))(((''!'#&$&&%(%*%+$-$/#1#2#4"6"8!:!;!= ? ACEGIKMOQSUWZ\^`bdgi~k|nzpyrwuuwszq|omkigeca_] [ Y W T R P NKIGDB?=;8530//////////// / / / / / / ///////////////////////// /!/"/"/#/$/%/%/&/'/(/(/)/*/+/+/,/-/././//0/1/1/2/3/4/4/5/6/7/7/8/9/:/:/;//?/@/A/A/B/C/D/D/E/F/G/G/H/I/J/J/K/L/M/M/N/O/P/P/~Q/}R/|S/|S/{T/zU/yV/yV/xW/wX/vY/vY/uZ/t[/s\/s\/r]/q^/p_/p_/o`/na/mb/mb/lc/kd/je/je/if/hg/gh/gh/fi/ej/dk/dk/cl/bm/an/an/`o/_p/^q/^q/]r/\s/[t/[t/Zu/Yv/Xw/Ww/Wx/Vy/Uz/Tz/T{/S|/R}/Q}/Q~/P/O/N/N/M/L/K/K/J/I/H/H/G/F/E/E/D/C/B/B/A/@/?/?/>/=/ADGJMPSVY \ _ b e g j m or}uzwvzs|pmjgda^[XVSPMJHEB@=;8631.,)'$"   !!""## # $$$%%%#"    ,+++***))((('!'#'$&&&(&*%+%-$/$1$2#4#6"8":";!=!? A CEGIKMOQSUWZ\^`bdgi}k|nzpxrvutwrzp|omkigeca^\Z X V T Q O M K HFDA?<:7520-++++++++++++ + + + + + + ++++++++++++++++++++++++ + +!+"+#+#+$+%+&+&+'+(+)+)+*+++,+,+-+.+/+/+0+1+2+2+3+4+5+6+6+7+8+9+9+:+;+<+<+=+>+?+?+@+A+B+B+C+D+E+E+F+G+H+H+I+J+K+K+L+M+N+O+O+P+Q+R+R+S+T+~U+}U+|V+|W+{X+zX+yY+yZ+x[+w[+v\+u]+u^+t^+s_+r`+ra+qa+pb+oc+od+nd+me+lf+lg+kh+jh+ii+ij+hk+gk+fl+fm+en+dn+co+cp+bq+aq+`r+`s+_t+^t+]u+\v+\w+[w+Zx+Yy+Yz+Xz+W{+V|+V}+U}+T~+S+S+R+Q+P+P+O+N+M+M+L+K+J+J+I+H+G+G+F+E+D+C+C+B+A+@+@+?+>+=+=+<+;+:+:+9+8+7+7+6+5+4+4+3+2+1+1+0+/+.+.+-+,+++*+*+)+(+'+'+&+%+$+$+#+"+!+!+ +++++++++++++++++++++++++ + + + + + ++++++++++++++*))('&&%$## " !       ! " ##$%&&'())*+-147;>ADGJMPSV Y \ _ b e g jmor|uywvzs|olifc`][XUROMJGDB?=:7520-+)&$!  !!!""## # $ $%%%&%#"    ,,+++***)))((!(#'$'&&(&*&+%-%/%1$2$4#6#8#:";"=!?!A C E GIKMOQSUWY\^`bdgi}k{nypwrvutwrzp|nljhfdb`^\ZW U S Q O L J H EC@><9741/,)(((((((((((( ( ( ( ( ( ( (((((((((((((((((((((((( (!(!("(#($($(%(&('('((()(*(*(+(,(-(-(.(/(0(1(1(2(3(4(4(5(6(7(7(8(9(:(:(;(<(=(=(>(?(@(A(A(B(C(D(D(E(F(G(G(H(I(J(J(K(L(M(M(N(O(P(Q(Q(R(S(T(T(U(V(W(~W(~X(}Y(|Z({Z({[(z\(y](x](x^(w_(v`(ua(ua(tb(sc(rd(rd(qe(pf(og(ng(nh(mi(lj(kj(kk(jl(im(hm(hn(go(fp(eq(eq(dr(cs(bt(bt(au(`v(_w(^w(^x(]y(\z([z([{(Z|(Y}(X}(X~(W(V(U(U(T(S(R(R(Q(P(O(N(N(M(L(K(K(J(I(H(H(G(F(E(E(D(C(B(B(A(@(?(>(>(=(<(;(;(:(9(8(8(7(6(5(5(4(3(2(2(1(0(/(.(.(-(,(+(+(*()((((('(&(%(%($(#("("(!( ((((((((((((((((((((((((( ( ( ( ( ( (((((((((((((''&%$$#"!!                !!"#$$%&''*-147;>ADGJMP S V Y \ _ b e gjmor{uxwuzr|olifc`]ZWTQOLIGDA?<9742/-*(&# ! !!"""##$ $ $%%&&&%#"    -,,,+++***))(!(#($'&'('*&+&-%/%0%2$4$6$8#:#;"="?!A!C!E G IKMOQSUWY\^`bdg~i|kznypwruuswqzo|mkigeca_][YWU R P N L I G E B@=;8631.+)&$$$$$$$$$$$$ $ $ $ $ $ $$$$$$$$$$$$$$$$$$$$$$$$$ $!$!$"$#$$$%$%$&$'$($($)$*$+$+$,$-$.$/$/$0$1$2$2$3$4$5$5$6$7$8$8$9$:$;$<$<$=$>$?$?$@$A$B$B$C$D$E$F$F$G$H$I$I$J$K$L$L$M$N$O$O$P$Q$R$S$S$T$U$V$V$W$X$Y$Y$Z$~[$~\$}]$|]${^${_$z`$y`$xa$xb$wc$vc$ud$te$tf$sf$rg$qh$qi$pj$oj$nk$nl$mm$lm$kn$jo$jp$ip$hq$gr$gs$ft$et$du$dv$cw$bw$ax$ay$`z$_z$^{$]|$]}$\}$[~$Z$Z$Y$X$W$W$V$U$T$S$S$R$Q$P$P$O$N$M$M$L$K$J$J$I$H$G$F$F$E$D$C$C$B$A$@$@$?$>$=$<$<$;$:$9$9$8$7$6$6$5$4$3$3$2$1$0$/$/$.$-$,$,$+$*$)$)$($'$&$%$%$$$#$"$"$!$ $$$$$$$$$$$$$$$$$$$$$$$$$ $ $ $ $ $ $$$$$$$$$$$$$$#"!!              !""#$'*-147;>ADGJM P S V Y \ _ b egjmo~r{uxwtzq|nkheb_\YVTQNKIFCA>;9641/,*' % #! !!""###$$ % %%&&&'%#"    --,,,+++***))!)#($(&(('*'+&-&/&0%2%4$6$8$:#;#="?"A"C!E!G I KMOQSUWY\^`bdg}i|kznxpvrturwpzo|mkigeca_\ZXVTR O M K I F D A ?=:8530-+(%"!!!!!!!!!!!! ! ! ! ! ! !!!!!!!!!!!!!!!!!!!!!!!!! !!!"!"!#!$!%!&!&!'!(!)!)!*!+!,!,!-!.!/!0!0!1!2!3!3!4!5!6!6!7!8!9!:!:!;!!?!@!A!A!B!C!D!D!E!F!G!G!H!I!J!K!K!L!M!N!N!O!P!Q!Q!R!S!T!U!U!V!W!X!X!Y!Z![!\!\!]!^!~_!~_!}`!|a!{b!zb!zc!yd!xe!wf!wf!vg!uh!ti!ti!sj!rk!ql!pl!pm!on!no!mp!mp!lq!kr!js!is!it!hu!gv!fw!fw!ex!dy!cz!cz!b{!a|!`}!_}!_~!^!]!\!\![!Z!Y!Y!X!W!V!U!U!T!S!R!R!Q!P!O!O!N!M!L!K!K!J!I!H!H!G!F!E!D!D!C!B!A!A!@!?!>!>!=!ADGJ M P S V Y \ _ begjmo}rzuwwtzq|mjgda^\YVSPMKHEC@=;8631.,) ' $!"! !""##$$$% % &&&'''%#"   .---,,,+++***!)#)$(&((()'+'-'/&0&2%4%6%8$:$;#=#?#A"C"E!G!I!K M OQSUWY\^`bdg}i{kynwpurturwpzn|ljhfdb`^\ZXUSQO L J H E C A > <9742/-*'$"        !"##$%&''()**+,-../0112345567889:;;<=>??@ABBCDEFFGHIIJKLMMNOPPQRSTTUVWWXYZ[[\]^^_`ab~b}c}d|e{ezfzgyhxhwivjvkultlsmsnroqoppoqornsmsltlukvjviwixhygzfze{e|d}c}b~ba`_^^]\[[ZYXWWVUTTSRQPPONMMLKJIIHGFFEDCBBA@??>=<<;:9887655432110/..-,+**)(''&%$##"!                     #'*-147;>ADG J M P S V Y \ _begjmo}ryuvwszp|mjgda^[XURPMJGEB?=:8530. + ) &!$!""""##$$%% % & &&''('%#"   ..---,,,+++**!*#)$)&)(()(+(-'/'0&2&4&6%8%:$;$=$?#A#C"E"G"I!K!M O QSUWY\^`bd~g|izkynwpursuqwozm|kigeca_][YWURPNL I G E B @ > ; 9641.,)&$!       !!"#$$%&'(()*++,-.//012234566789::;<==>?@AABCDDEFGHHIJKKLMNOOPQRRSTUVVWXYZZ[\]]^_`aabcdde~f}g}h|h{izjykykxlwmvnvouotpsqrrrsqsptouovnvmwlxkykzjzi{h|h}g}f~eddcba``_^]]\[ZYYXWVVUTSRRQPOONMLKKJIHHGFEDDCBA@@?>==<;:9987665432210//.-,++*)(''&%$$#"!                #'*-147;>AD G J M P S V Y \_begjmo|ryuvwrzo|lifc`]ZWUROLIGDA?<:742/ - +!(!&!#"!"###$$%%% & &'''(('%#"   /...---,,,+++!*#*$*&)())(+(-(/'0'2'4&6&8%:%;%=$?$A#C#E#G"I"K!M!O Q S UWY\^`bd}g|izkxnvptrrupwozm|kigeca_]ZXVTRPMKI F D B ? = : 8 530.+(&#        !""#$%%&'())*+,--./0012344567889:;;<=>??@ABCCDEFFGHIJJKLMNNOPQQRSTUUVWXYYZ[\\]^_``abcddefgghi~j}k}k|l{mznyoyoxpwqvrurusttsurvrvqwpxoynznzm{l|k}j}j~ihggfedccba`__^]\\[ZYXXWVUTTSRQQPONMMLKJIIHGFFEDCBBA@?>>=<;;:9877654332100/.-,,+*)(('&%%$#"!!        #'*-147;>A D G J M P S V Y\_begjm~o{rxuuwrzo|lheb_]ZWTQNLIFCA><9641 / ,!*!("%"#" ##$$$%%&& & ''((()'%#"  //...---,,,++!+#*$*&*()))+)-(/(0'2'4'6&8&:&;%=%?$A$C$E#G#I"K"M!O!Q!S U WY\^`b~d}g{iykwnuptrrupwnzl|jhfdb`^\ZXVSQOMJHFC A ? < : 7 5 20-*(%"       !"##$%&&'()**+,-../0122345567899:;<==>?@AABCDDEFGHHIJKLLMNOPPQRSTTUVWWXYZ[[\]^__`abccdeffghijjklm~n}n|o|p{qzryrxsxtwuvvuvuwtxsyryqzq{p|o}n}m~mlkjiihgfeedcbba`_^^]\[ZZYXWVVUTSSRQPOONMLKKJIHGGFEDCCBA@@?>=<<;:9887654432110/.--,+*))('&%%$#"!!                  #'*-147;> A D G J M P S VY\_begjm~o{rwutwqzn|kheb_\YVSPNKHFC@>;86 3 1 .!,!)"'"%#"# #$$%%%&&' ' '((())'%#"  0///...---,,,!+#+$+&*(*)*+)-)/(0(2(4'6'8':&;&=%?%A%C$E$G#I#K"M"O"Q!S!U W Y\^`b~d|gzixkwnupsrquowmzk|igeca_][YWUSPNLJGEC@ > ; 9 6 4 1 /,*'$!        !"#$$%&''()*++,-.//012334567789:;;<=>??@ABCCDEFGGHIJJKLMNNOPQRRSTUVVWXYZZ[\]^^_`abbcdeffghijjklmmnopq~q}r|s|t{uzuyvxwxxwyvyuzt{t|s}r}q~pponmllkjihhgfeddcba``_^]\\[ZYYXWVUUTSRQQPONMMLKJIIHGFEEDCBAA@?>==<;:9987665432210/..-,+**)('&&%$#""!                   #'*-147; > A D G J M PSVY\_begjm}ozrwutwpzm|jgda^[XVSPMJHEB@=:8 5 3!0!.!+")"&#$#"$$$%%&&&' ' ( (())))'%# "   00////...---,!,#+$+&+(*)*+*-)/)0)2(4(6'8'9';&=&?%A%C%E$G$I#K#M#O"Q"S!U!W Y \ ^`b}d{gzixkvntprrpuowmzk|i~geca_][XVTRPNKIGDB@= ; 8 6 3 1 . +)&#!                                 ! ! " # $ % % & ' ( ) ) * + , - - . / 0 1 1 2 3 4 5 5 6 7 8 9 9 : ; < = = > ? @ A A B C D E E F G H I I J K L M M N O P Q Q R S T U U V W X Y Y Z [ \ ] ] ^ _ ` a a b c d e e f g h i i j k l m m n o p q q r s t u ~u }v |w {x {y zy yz x{ w| w} v} u~ t s s r q p o o n m l k k j i h g g f e d c c b a ` _ _ ^ ] \ [ [ Z Y X W W V U T S S R Q P O O N M L K K J I H G G F E D C C B A @ ? ? > = < ; ; : 9 8 7 7 6 5 4 3 3 2 1 0 / / . - , + + * ) ( ' ' & % $ # # " !                                     #'*-147 ; > A D G J MPSVY\_begjm|oyrvuswpzm|jfca^[XUROLJGDB?<: 7 5!2!0"-"+"(#&##$!$%%%&&''' ( (())**)'%# "   1000///...---!,#,$,&+(+)++*-*/*0)2)4(6(8(9';'=&?&A&C%E%G$I$K$M#O#Q"S"U!W!Y!\ ^ `~b}d{gyiwkuntprrpunwlzj|h~fdb`^\ZXVSQOMKHFDA?<: 8 5 3 0 - + (%#           !""#$%&&'()**+,-../012234566789:;;<=>??@ABCCDEFGGHIJKKLMNOOPQRSSTUVWXXYZ[\\]^_``abcddefghhijkllmnoppqrstuuvwxy~y}z|{{|{}z}y~xwvvutsrrqponnmlkjjihgffedcbba`_^^]\[ZYYXWVUUTSRQQPONMMLKJIIHGFEEDCBA@@?>=<<;:9887654432100/.-,,+*)(('&%$##"!        #'*-14 7 ; > A D G JMPSVY\_begjm|oyruurwozl|ifc`]ZWTQOLIFDA> < 9 7!4!1"/",#*#(#%$#$ %%&&&'''( ( )))***)'%# " ! 110000///...-!-#-$,&,(,)+++-*/*0*2)4)6)8(9(;'='?'A&C&E%G%I%K$M$O#Q#S#U"W"Y!\!^ `~ b|dzgxiwkunspqroumwkyi|h~fdb`][YWUSQNLJHECA><97 4 2 / - * ' %"         !"##$%&''()*++,-./0012344567889:;<==>?@AABCDEEFGHIIJKLMNNOPQRRSTUVVWXYZ[[\]^__`abccdefgghijkllmnoppqrsttuvwxyyz{|~}~}}~|{zzyxwvvutsrqqponmmlkjiihgfeedcba``_^]\\[ZYXXWVUTSSRQPOONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.--,+*))('&%$$#"!          #'*-1 4 7 ; > A D GJMPSVY\_begj~m{oxruurwnzk|heb_\YVTQNKHFC@ > ;!9!6!3"1".#,#)$'$%$"% %&&&''((( ) )***+*)'%# " ! 2111000///...!-#-$-&,(,),++-+/+0*2*4*6)8)9(;(=(?'A'C&E&G&I%K%M$O$Q$S#U#W"Y"\!^!`} b{ dz gxivktmrpproumwkyi|g~eca_][YVTRPNKIGEB@=;964 1 . , ) ' $ !     !"#$%%&'())*+,-../012234566789:;;<=>??@ABCDDEFGHHIJKLLMNOPQQRSTUUVWXYZZ[\]^^_`abbcdefgghijkklmnoppqrsttuvwxxyz{|}}~~}||{zyxwwvutssrqpoonmlkjjihgffedcbaa`_^]]\[ZYYXWVUTTSRQPPONMLKKJIHGGFEDCCBA@?>>=<;::987655432110/.--,+*)(('&%$$#"!    #'*- 1 4 7 ; > A DGJMPSVY\_begj~mzowrtuqwnzk|heb_\YVSPMKHEB @ =!:!8"5"3#0#.#+$)$&%$%"%&&'''(() ) ) ***++*)'% # "  " 222111000///.!.#.$-&-(-),+,-,/+0+2*4*6*8)9);)=(?(A'C'E'G&I&K%M%O%Q$S$U#W#Y"["^~"`}!b{!dy gw iuktmrpprnulwjyh|f~db`^\ZXVTQOMKIFDB?=:8530 . + ) & #            !"#$%%&'())*+,-../012234566789:;;<=>??@ABCDDEFGHHIJKLLMNOPQQRSTUUVWXYZZ[\]^^_`abbcdefgghijkklmnoppqrsttuvwxxyz{|}}~~}||{zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYYXWVUTTSRQPPONMLKKJIHGGFEDCCBA@?>>=<;::987655432110/.--,+*)(('&%$$#"!         #' * - 1 4 7 ; > ADGJMPSVY\_begj}mzowrsupwmzj|gda^[XURPMJG D B ?!=!:"7"5#2#0$-$+$(%&%#&!&&''((()) * **+++,*)'% # "  " 3222111000///!.#.$.&-(-)-+,-,/,0+2+4+6*8*9*;)=)?(A(C(E'G'I&K&M&O%Q%S$U$W#Y#[#^~"`|"bz!dx!gw iu ksmqpormukwjyh|f~db`^[YWUSQOLJHFCA><:7520- * ( % "             !"#$%%&'())*+,-../012234566789:;;<=>??@ABCDDEFGHHIJKLLMNOPQQRSTUUVWXYYZ[\]^^_`abbcdefgghijkklmnoopqrsttuvwxxyz{|}}~~}||{zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYYXWVUTTSRQPPONMLKKJIHGGFEDCCBA@?>>=<;::987655432110/.--,+*)(('&%$$#"!         # ' * - 1 4 7 ; >ADGJMPSVY\_begj|myovrsuowlzi|fc`]ZWUROLI G D!A!?"<"9"7#4#2$/$-%*%(%%&#& '''(()))* * *++,,,*)'% # " !# 333222111000/!/#/$.&.(.)-+---/,0,2,4+6+8*9*;*=)?)A)C(E(G'I'K'M&O&Q%S%U$W$Y$[#^}#`{"bz"dx!gv!it kr mp pormukwiyg|e~ca_][YWTRPNLIGEC@>;9641/,*' $ "              !"#$%%&'())*+,--./012234566789:;;<=>??@ABCCDEFGHHIJKLLMNOPQQRSTUUVWXYYZ[\]^^_`abbcdefgghijkklmnoopqrsttuvwxxyz{|}}~~}||{zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCCBA@?>>=<;::987655432110/.--,+*)(('&%$$#"!          # ' * - 1 4 7 ;>ADGJMPSVY\_begj|mxourruowlzi|fc`]ZWTQNK I F!C!A">";#9#6#4$1$.%,%*&'&%&"' '((())*** + ++,,,,*)' % # " !# 4333222111100!0#/$/&/(.).+.--/-0,2,4,6+8+9+;*=*?*A)C)E(G(I(K'M'O&Q&S&U%W%Y$[~$^}#`{#by"dw"gu"it!kr!mp pn rlujwhyf|d~b`^\ZXVTROMKIFDB?=;8631.,)&$ !                !"#$%%&'())*+,--./012234566789:;;<=>??@ABCCDEFGHHIJKLLMNOPQQRSTUUVWXYYZ[\]^^_`abbcdefgghijkklmnoopqrsttuvwxxyz{|}~}}~||{zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.--,+*)(('&%$$#"!           # ' * - 1 47;>ADGJMPSVY\_beg~j{mxotrqunwkzh|eb_\YVSP N K!H!E!C"@"=#;#8$5$3%0%.%+&)&&'$'"'(()))*** + + ,,,--,*)' % # "  "$ 4443332221110!0#0$/&/(/).+.-./-0-2-4,6,8,9+;+=*?*A*C)E)G)I(K(M'O'Q'S&U&W%Y%[~$^|$`z$bx#dw#gu"is"kq!mo!pm rk ujwhyf|d~b`^\ZWUSQOMJHFCA?<:7530-+(&#                !"#$%%&'())*+,--./012234566789:;;<=>??@ABCCDEFGHHIJKLLMNOPQQRSTUUVWXYYZ[\]^^_`abbcdefgghijkklmnoopqrsttuvwxxyz{|~}}}|~|{zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!            # ' * - 147;>ADGJMPSVY\_beg~jzmwotrqumwjzg|da^[XUS P M!J!G"E"B"?#=#:$7$5%2%0&-&+&('&'#(!(())***++ + ,,,--.,*)' % #" !"$ 5444333322211!1#0$0&0(/)/+/-./.0.2-4-6-8,9,;+=+?+A*C*E*G)I)K(M(O(Q'S'U&W&Y%[}%^{%`z$bx$dv#gt#ir"kp"mo!pm!rk!ui wg ye|c~a_][YWURPNLJGEC@><9742/-*'%"              !"#$%%&'())*+,--./012234566789:;;<=>??@ABCCDEFGHHIJKLLMNOPPQRSTUUVWXYYZ[\]^^_`abbcdeffghijkklmnoopqrsttuvwxxyz{~|}||}|~{zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!             # ' * -147;>ADGJMPSVY\_beg}jzmvosrpumwjzg|da^[XU R O!L!J"G"D#A#?$<$9$7%4%2&/&-'*'('%(#(!)))***++, , ,---..,*)' % #" !#% 5554443332221!1#1$0&0(0)/+/-//.0.2.4-6-8-9,;,=,?+A+C*E*G*I)K)M)O(Q(S'U'W&Y~&[|&^{%`y%bw$du$ft#ir#kp#mn"pl"rj!uh!wf yd |b~`^\ZXVTRPMKIGDB@=;9641/,)'$!              !"#$$%&'())*+,--./012234566789::;<=>??@ABCCDEFGHHIJKLLMNOPPQRSTUUVWXYYZ[\]^^_`abbcdeffghijkklmnoopqrsttuvwxxyz~{}||||}{~zyxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!              # ' *-147;>ADGJMPSVY\_beg|jymvorroulwizf|c`]Z W T!Q!O!L"I"F#C#A$>$;%9%6%4&1&/','*('(%(") )***+++,, - --....,*) ' % #"  "#% 6555444433322!2#1$1&1(0)0+0-///0/2.4.6.8-9-;-=,?,A+C+E+G*I*K*M)O)Q(S(U'W'Y~'[|&^z&`x%bw%du%fs$iq$ko#mm#pl"rj"uh!wf!yd |b ~`^\ZXUSQOMKHFDA?=:8530.+)&#!             !"#$$%&'())*+,--./012234566789::;<=>??@ABCCDEFGHHIJKLLMNOPPQRSTUUVWXYYZ[\]^^_`abbcdeffghijkklmnoopqrsttuvwxxy~z}{||||{}z~yxwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!              # '*-147;>ADGJMPSVY\_beg{jxmuorroukwhze|b_\ Y V!T!Q"N"K#H#F#C$@$=%;%8&6&3'1'.'+()(')$)")**+++,,, - - -..//.,*) ' % #"  "$& 6665554443333!2#2$2&1(1)1+0-0/00/2/4/6.8.9-;-=-?,A,C,E+G+I*K*M*O)Q)S)U(W(Y}'[{'^z&`x&bv&dt%fr%ip$ko$mm#pk#ri"tg"we!yc!|a!~_ ] [YWUSPNLJHECA><:7520-+(%#             !"#$$%&'())*+,--./012234566789::;<=>??@ABCCDEFGHHIJKLLMNOPPQRSTUUVWXYYZ[\]^^_`abbcdeffghijkklmnoopqrsttuvwxx~y}z|{||{|z}y~xwwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!             #'*-147;>ADGJMPSVY\_be~g{jxmtoqrnukwhze|b_ \ Y!V!S"P"M#J#H$E$B%@%=%:&8&5'2'0(-(+(()&)$*!**+++,,-- - ...///.,*) ' %#" !#$& 7666655544433!3#2$2&2(1)1+1-0/0002/4/6/8.9.;.=-?-A-C,E,G+I+K+M*O*Q*S)U)W~(Y|([{'^y'`w'bu&dt&fr%ip%kn$ml$pj$rh#tf#wd"yb"|`!~^!\ Z XVTRPNKIGEB@>;9641/,*'%"            !"#$$%&'())*+,--./012234566789::;<=>??@ABCCDEFGGHIJKLLMNOPPQRSTUUVWXYYZ[\]]^_`abbcdeffghijkklmnoopqrsstuvwx~x}y|z|{{|z|y}x~wwvutssrqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!            #'*-147;>ADGJMPSVY\_be}gzjwmtoprmujwgzd |a ^![!X"U"R"O#M#J$G$D%B%?&<&:&7'4'2(/(-)*)()%*#*!+++,,,--. . .///0/.,*) ' %#" !#%' 7776665555444!3#3$3&2(2)2+1-1/10020406/7/9/;.=.?.A-C-E,G,I,K+M+O*Q*S*U)W~)Y|([z(^x(`w'bu'ds&fq&io&km%ml%pj$rh$tf#wd#yb"|`"~^!\!Z X VSQOMKHFDB?=;8631.,)&$!                                       ! " # $ $ % & ' ( ) ) * + , - - . / 0 1 1 2 3 4 5 6 6 7 8 9 : : ; < = > ? ? @ A B C C D E F G G H I J K L L M N O P P Q R S T U U V W X Y Y Z [ \ ] ] ^ _ ` a b b c d e f f g h i j k k l m n o o p q r s s t u v w~ x} x| y| z{ {z |y |x }w ~w v u t s s r q p o n n m l k j j i h g f f e d c b a a ` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P P O N M L K K J I H G G F E D C B B A @ ? > > = < ; : : 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ $ # " !                                     #'*-147;>ADGJMPSVY\_be}gyjvmsoprmuiwf zc |`!]!Z"W"U#R#O$L$I$F%D%A&>&<'9'6'4(1(/),)**'*%*"+ ++,,---.. . ///00/.,* ) ' %#"  "$%' 8777766655544!4#4$3&3(3)2+2-2/101214060709/;/=.?.A.C-E-G-I,K,M+O+Q+S*U*W})Y{)[y)^x(`v(bt'dr'fp'io&km&mk%pi%rg$te$wc#ya#|_#~]"["Y!W!U S QNLJHFCA?<:7530-+(&#                                       ! " # $ $ % & ' ( ) ) * + , - - . / 0 1 1 2 3 4 5 6 6 7 8 9 : : ; < = > ? ? @ A B C C D E F G G H I J K L L M N O P P Q R S T U U V W X Y Y Z [ \ ] ] ^ _ ` a b b c d e f f g h i j k k l m n o o p q r s s t u v~ w} x| x| y{ zz {y |x |w }w ~v u t s s r q p o n n m l k j j i h g f f e d c b a a ` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P P O N M L K K J I H G G F E D C B B A @ ? > > = < ; : : 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ $ # " !                                     #'*-147;>ADGJMPSVY\_be|gyjumroorlui wf zc!|`!]"Z"W#T#Q$N$K%I%F&C&@&>';'8(6(3)1).),*)*'+$+"+,,,--... / / /0001/.,* ) ' %#"  "$&( 8887776666555!4#4$4&3(3)3+2-2.2012141607090;/=/?/A.C.E.G-I-K,M,O,Q+S+U~+W|*Y{*[y)^w)`u(bt(dr(fp'in'kl&mj&ph%rf%te%wc$ya$|_#~]#["X"V!T!R P NLIGEC@><9742/-*(%"                                      ! " # $ $ % & ' ( ) ) * + , - - . / 0 1 1 2 3 4 5 6 6 7 8 9 : : ; < = > ? ? @ A B C C D E F G G H I J K L L M N O P P Q R S T U U V W X Y Y Z [ \ ] ] ^ _ ` a b b c d e f f g h i j k k l m n o o p q r s s t u~ v} w| x| x{ yz zy {x |w |w }v ~u t s s r q p o n n m l k j j i h g f f e d c b a a ` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P P O N M L K K J I H G G F E D C B B A @ ? > > = < ; : : 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ $ # " !                                      #'*-147;>ADGJMPSVY\_be{gxjumron rk uh!we!zb"|_"\#Y#V#S$P$N%K%H&E&B'@'=':(8(5)3)0*-*+*)+&+$,!,,---.../ / 000111/.,* ) '%#" !#%&( 9888877766655!5#5$4&4(4)3+3-3.2022241617190;0=0?/A/C/E.G.I-K-M-O,Q,S+U}+W|+Yz*[x*^w)`u)bs)dq(fo(im'kl'mj'ph&rf&td%wb%y`$|^$~\#Z#X"V"T!Q!O M KIFDB@=;8641/,)'$"                                    ! " # $ $ % & ' ( ) ) * + , - - . / 0 1 1 2 3 4 5 6 6 7 8 9 : : ; < = > > ? @ A B C C D E F G G H I J K L L M N O P P Q R S T T U V W X Y Y Z [ \ ] ] ^ _ ` a b b c d e f f g h i j j k l m n o o p q r s s t~ u} v| w| x{ xz yy zx {w |w |v }u ~t s s r q p o n n m l k j j i h g f f e d c b a a ` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P P O N M L K K J I H G G F E D C B B A @ ? > > = < ; : : 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ $ # " !                                        #'*-147;>ADGJMPSVY\_b~e{gwjtmq on rk!ug!wd"za"|^#[#X$U$S%P%M%J&G&E'B'?(<(:)7)5)2*/*-+*+(+%,#,!,--.../// 0 001111/.,* ) '%#"  !#%') 9998887777666!5#5$5&4(4)4+4-3.3032242627191;1=0?0A/C/E/G.I.K.M-O-Q,S,U},W{+Yy+[x+]v*`t*br)dp)fo(im(kk(mi'pg're&tc&wa%y_%|]$~[$Y$W#U#S"Q"O!L!J H FDA?=:8530.+)&$!                                     ! " # $ $ % & ' ( ( ) * + , - - . / 0 1 1 2 3 4 5 6 6 7 8 9 : : ; < = > > ? @ A B C C D E F G G H I J K L L M N O P P Q R S T T U V W X Y Y Z [ \ ] ] ^ _ ` a b b c d e f f g h i j j k l m n o o p q r s s~ t} u| v| w{ xz xy yx zw {w |v |u }t ~s s r q p o n n m l k j j i h g f f e d c b a a ` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P P O N M L K K J I H G G F E D C B B A @ ? > > = < ; : : 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ $ # " !                                           #'*-147;>ADGJMPSVY\_b}ezgwjs mp om!rj!ug"wd"za#|^#[$X$U%R%O&L&I'G'D'A(>(<)9)7*4*1*/+,+*,',%,"- --..///0 0 0 111221/., * ) '%#"  "$&') :999988877776!6#6$5&5(5)4+4-4.3032342627291;1=1?0A0C0E/G/I/K.M.O-Q-S~-U|,W{,Yy,[w+]u+`t*br*dp)fn)il)kj(mh(pg're'tc&wa&y_&|]%~[%Y$V$T#R#P"N"L!J!G E CA><97520-+(%#                                      ! " # $ $ % & ' ( ( ) * + , - - . / 0 1 1 2 3 4 5 6 6 7 8 9 : : ; < = > > ? @ A B C C D E F G G H I J K L L M N O P P Q R S T T U V W X Y Y Z [ \ ] ] ^ _ ` a b b c d e f f g h i j j k l m n o o p q r s~ s} t| u| v{ wz xy xx yw zw {v |u |t }s ~s r q p o n n m l k j j i h g f f e d c b a a ` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P P O N M L K K J I H G G F E D C B B A @ ? > > = < ; : : 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ $ # " !                                             #'*-147;>ADGJMPSVY\_b}ey gv js!mp!ol"ri"uf#wc#z`$|]$Z%W%T%Q&N&L'I'F(C(A(>);)9*6*3+1+.+,,),'-$-"-...///00 1 1122231/., * )'%#" !"$&(* :::9999888777!6"6$6&6(5)5+5-4.4042343637292;2=1?1A1C0E0G0I/K/M.O.Q.S}-U|-Wz-Yx,[v,]u+`s+bq+do*fm*il)kj)mh(pf(rd(tb'w`'y^&|\&~Z%X%V$T$R#O#M"K"I!G!D B @>;9641/,*'%"            !"#$$%&'(()*+,--./011234566789::;<=>>?@ABCCDEFGGHIJKLLMNOPPQRSTTUVWXYYZ[\]]^_`abbcdeffghijjklmnoopqr~s}s|t|u{vzwyxxxwywzv{u|t|s}s~rqponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                     #'*-147;>ADGJMPSVY\_b| ey gu!jr!mo"ol"ri#ue#wb$z_$|\%Y%V&T&Q'N'K'H(E(C)@)=*;*8*5+3+0,.,+,)-&-$.!..///0001 1 1222331/., * )'%#" !#%'(* ;::::99988887!7"7$6&6(6)5+5-5.5042444637393;2=2?2A1C1E0G0I0K/M/O/Q.S}.U{.Wy-Yx-[v,]t,`r,bp+do+fm*ik*ki)mg)pe)rc(ta(w_'y]'|[&~Y&W&U%S%Q$O$M#J#H"F"D!A!? = :8631.,)'$!          !"#$$%&'(()*+,--./011234556789::;<=>>?@ABCCDEFGGHIJKKLMNOPPQRSTTUVWXYYZ[\]]^_`aabcdeffghijjklmnoopq~r}s|s|t{uzvywxwwxwyvzu{t|s|s}r~qponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                     #'*-147;>ADGJMPSVY\_~ b{!ex!gu"jq"mn#ok#rh#ue$wb$z_%|\%Y&V&S'P'M(J(H(E)B)?*=*:+7+5+2,0,--*-(-&.#.!.//000111 2 2233331/. , * )'%#"  "$%')+ ;;;::::999888!8"7$7&7(6)6+6-5.5052444647393;3=2?2A2C1E1G1I0K0M0O/Q~/S|.U{.Wy.Yw-[u-]t-`r,bp,dn+fl+ij+kh*mg*pe)rc)ta(w_(y](|['~Y'W&U&R%P%N$L$J#H#E"C"A!>!< : 7530.+(&#!           !"#$$%&'(()*+,--./011234556789::;<=>>?@ABCCDEFGGHIJKKLMNOPPQRSTTUVWXYYZ[\]]^_`aabcdeffghijjklmnoop~q}r|s|s{tzuyvxwwwwxvyuzt{s|s|r}q~ponnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                     #'*-147;>ADGJMPSVY \ _~!bz!ew"gt"jq#mm#oj$rg$ud%wa%z^&|[&X&U'R'O(L(J)G)D*A*?*<+9+7,4,1,/-,-*.'.%."/ //001112 2 2 3334431/. , * )'%#"  "$&()+ <;;;;::::9998!8"8$7&7(7)7+6-6.6052545647494;3=3?3A2C2E2G1I1K1M0O0Q}/S|/Uz/Wx.Yv.[u.]s-`q-bo,dm,fl,ij+kh+mf*pd*rb)t`)w^)y\(|Z(~X'V'T&R&P%N%K%I$G$E#B#@">";!9!7 42/-*(%#           !"#$$%&'(()*+,--./011234556789::;<=>>?@ABCCDEFGGHIJKKLMNOPPQRSTTUVWXYYZ[\]]^_`aabcdeffghijjklmnoo~p}q|r|s{sztyuxvwwwwvxuytzs{s|r|q}p~onnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                     #'*-147;>ADGJMPSV Y \!_}!bz"ev"gs#jp#mm$oj$rg%uc%w`&z]&|Z'W'T(R(O(L)I)F*C*A+>+;+9,6,3-1-.-,.).'/$/"/00011122 2 33444431/. , *)'%#" !#%&(*, <<<;;;;:::999!9"8$8&8'7)7+7-6.6062645657594;4=4?3A3C3E2G2I2K1M1O~0Q}0S{0Uy/Wx/Yv/[t.]r.`p-bo-dm-fk,ii,kg+me+pc+ra*t_*w])y[)|Y(~W(U'S'Q'O&M&K%H%F$D$B#?#=";"8!6!4 1 /,*'$"          !"#$$%&'(()*+,--./011234556789::;<=>>?@ABCCDEFGGHIJKKLMNOPPQRSTTUVWXYYZ[\]]^_`aabcdeffghijjklmno~o}p|q|r{szsytxuwvwwvwuxtyszs{r|q|p}o~nnmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                     #'*-147;>ADGJMPS V!Y!\"_|"by#ev#gs$jo$ml%oi%rf%uc&w`&z]'|Z'W(T(Q)N)K*H*F*C+@+=,;,8,5-3-0...+.)/&/$0!001112223 3 34445531/. , *)'%#"  !#%')*, =<<<<;;;;:::9!9"9$9&8'8)8+7-7.7062646657595;4=4?4A3C3E3G2I2K2M1O~1Q|1Sz0Uy0Ww0Yu/[s/]r.`p.bn.dl-fj-ii,kg,me,pc+ra+t_*w]*y[)|Y)~W)U(S(P'N'L&J&H%F%C$A$?#<#:"8"5!3!0 . +)&$!         !"#$$%&'(()*+,,-./011234556789::;<=>>?@ABBCDEFGGHIJKKLMNOPPQRSTTUVWXXYZ[\]]^_`aabcdeffghijjklmn~n}o|p|q{rzsysxtwuwvvwuwtxsyszr{q|p|o}n~nmlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                    #'*-147;>ADGJM P S!V!Y"\"_|#bx#eu$gr$jo%mk%oh&re&ub'w_'z\(|Y(V(S)P)M*K*H+E+B+?,=,:-7-5-2.0.-/+/(/&0#0!011222333 4 44555531/ . , *)'%#"  "$&')+- ===<<<<;;;;::!:"9$9&9'8)8+8-8.7072746667695;5=5?4A4C4E3G3I3K2M2O}2Q|1Sz1Ux1Wv0Yu0[s/]q/`o/bm.dl.fj-ih-kf-md,pb,r`+t^+w\+yZ*|X*~V)T)R(P(N'L'I'G&E&C%@%>$<$9#7#5"2"0!-!+ (&#           !"#$$%&'(()*+,,-./011234556789::;<=>>?@ABBCDEFGGHIJKKLMNOPPQRSTTUVWXXYZ[\]]^_`aabcdeffghijjklm~n}n|o|p{qzrysxswtwuvvuwtwsxsyrzq{p|o|n}n~mlkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                  #'*-147;>ADGJ M P!S!V"Y"\~#_{#bx$et$gq%jn%mk&oh&re'ua'w^(z[(|X)U)S*P*M*J+G+D,B,?,<-9-7.4.2.//,/*0'0%0#1 11222334 4 4 55566531/ . , *)'%#" !"$&(*+- >====<<<<;;;:!:":$:&9'9)9+8-8.8082747677696;6=5?5A5C4E4G4I3K3M~3O}2Q{2Sy1Ux1Wv1Yt0[r0]p0`o/bm/dk.fi.ig.ke-mc-pb,r`,t^,w\+yZ+|X*~U*S)Q)O)M(K(I'F'D&B&@%=%;$9$6#4#1"/"-!*!' % "           !"#$$%&'(()*+,,-./011234556789::;<=>>?@ABBCDEFGGHIJKKLMNOPPQRSTTUVWXXYZ[\]]^_`aabcdeffghijjkl~m}n|n|o{pzqyrxswswtvuuvtwswsxryqzp{o|n|n}m~lkjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                #'*-147;>AD G J!M!P"S"V#Y#\~$_z$bw%et%gp&jm&mj'og'rd(ua(w^(z[)|X)U*R*O+L+I+F,D,A->-<.9.6.4/1/./,0)0'1$1"122233344 4 555666531/ . ,*)'%#" !#%'(*,. >>>====<<<<;;!;":$:&:':)9+9-9.8082847677797;6=6?6A5C5E5G4I4K4M~3O|3Qz2Sy2Uw2Wu1Ys1[r1]p0`n0bl/dj/fi/ig.ke.mc.pa-r_-t],w[,yY+|W+~U+S*Q*O)L)J(H(F'D'A&?&=&:%8%6$3$1#.#,")!'!$ "           !"#$$%&'(()*+,,-./011234556789::;<=>>?@ABBCDEFGGHIJKKLMNOPPQRSTTUVWXXYZ[\]]^_`aabcdeffghijjk~l}m|n|n{ozpyqxrwswsvtuutvswswrxqypzo{n|n|m}l~kjjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!              #'*-147;>A D G!J!M"P"S#V#Y$\}$_z%bv%es&gp&jm'mi'of(rc(u`)w])zZ*|W*T*Q+N+K,I,F-C-@->.;.8/6/3/00.0+0)1&1$2!223334445 5 566677531/ . ,*)'%#"  "#%')+,. ?>>>>====<<<'<&:&7%5%3$0$.#+#)"&"#!!!            !"##$%&'(()*+,,-./0112345567899:;<=>>?@ABBCDEFGGHIJKKLMNOOPQRSTTUVWXXYZ[\]]^_`aabcdeefghijj~k}l|m|n{nzoypxqwrwsvsuttusvswrwqxpyozn{n{m|l}k~jjihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!            #'*-147;> A D!G"J"M#P#S$V$Y%\|%_y&bv&er'go'jl'mi(of(rb)u_)w\*zY*|V+S+Q+N,K,H-E-B.@.=.:/8/502000-1+1(1&2#2!333444555 6 667777531 / . ,*)'%#"  "$&()+-/ ???>>>>====<';'9&7&4%2%/$-$*#(#%"#" !!              !"##$%&'(()*+,,-./0112345567899:;<=>>?@ABBCDEFGGHIJKKLMNOOPQRSTTUVWXXYZ[\]]^_`aabcdeefghij~j}k|l|m{nznyoxpwqwrvsusttsusvrwqwpxoynzn{m{l|k}j~jihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!          #'*-147 ; >!A!D"G"J#M#P$S$V%Y%\|&_x&bu'er'gn(jk(mh)oe)rb*u_*w\*zY+|V+S,P,M-J-G-E.B.?/>>>====!<"<$<&;';);+;-:.:0:2949597998;8=8?7A7C7E6G6I6J~5M|5Oz5Qy4Sw4Uu4Ws3Yr3[p2]n2`l2bj1di1fg1he0kc0ma/o_/r]/t[.wY.yW-|U-~S-Q,O,M+J+H*F*D)B)?)=(;(8'6'4&1&/%,%*$'$%#"#""!              !"##$%&'(()*+,,-./0112345567899:;<=>>?@ABBCDEFGGHIJKKLMNOOPQRSTTUVWXXYZ[\]]^_`aabcdeefghi~j}j|k|l{mznynxowpwqvrustsstsurvqwpwoxnynzm{l{k|j}j~ihgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!          #'*-14 7 ;!>!A"D"G#J#M$P%S%V&Y~&\{&_x'bt'eq(gn(jk)mg)od*ra*u^+w[+zX,|U,R,O-L-J.G.D.A/>/<090604111/2,2)2'3$3"3 44455566 6 7778887531 / .,*)'%#" !#%')*,.0 @@@@???>>>>==!="=$<&<'<);+;-;.;0:2:4:597999;8=8?8A7C7E7G7H6J}6M{6Oz5Qx5Sv4Uu4Ws4Yq3[o3]m3`l2bj2dh2ff1hd1kb0m`0o^0r\/tZ/wX.yV.|T.~R-P-N,L,J+H+E+C*A*?)<):(8(5'3'0&.&+%)%&$$$!##""!!            !"##$%&'(()*+,,-./0112345567899:;<=>>?@ABBCDEFGGHIJKKLMNOOPQRSTTUVWXXYZ[\]]^_`aabcdeefgh~i}j|j|k{lzmynxnwowpvqurtsssstruqvpwownxnymzl{k{j|j}i~hgffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!            #'*-1 4 7!;">"A#D#G$J$M%P%S&V&Y}'\z'_w(bt(ep)gm)jj*mg*od*r`+u]+wZ,zW,|T-R-O-L.I.F/C/A/>0;081613102.2+3)3&3$4!445556667 7 7888987531 / .,*)'%#"  "$&')+-/0 AA@@@@???>>>>!="=$=&='<)<+<-;.;0;2;4:5:7:99;9=9?8A8C8E7F7H~7J}6L{6Oy6Qx5Sv5Ut5Wr4Yp4[o4]m3_k3bi3dg2fe2hd1kb1m`1o^0r\0tZ/wX/yV/|T.~R.P-M-K-I,G,E+B+@*>*<)9)7)5(2(0'-'+&(&&%#%!$$#""!!            !"##$%&'(()*+,,-./0012345567899:;<=>>?@ABBCDEFFGHIJKKLMNOOPQRSTTUVWXXYZ[\\]^_`aabcdeefg~h}i|j|j{kzlymxnwnwovpuqtrsrssrtqupvownwnxmylzk{j{j|i}h~gffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!              #'* - 1!4!7";">#A#D$G$J%M%P&S&V'Y}'\y(_v(bs)ep)gl*ji*mf+oc+r`,u],wZ,zW-|T-Q.N.K/H/E/C0@0=1:181522202-3+3(3&4#4!555666777 8 889998753 1 / .,*)'%#" !"$&(*+-/1 AAAA@@@@???>>!>">$=&='=)=+<-<.<0;2;4;5:7:9:;:=9?9A9C8E8F8H~7J|7Lz7Oy6Qw6Su6Us5Wr5Yp5[n4]l4_j4bi3dg3fe2hc2ka2m_1o]1r[1tY0wW0yU/|S/~Q/O.M.K-H-F,D,B,@+=+;*9*6)4)1(/(-'*'(&%&"% %$$##"" !             !"##$%&'(()*+,,-./0012345567899:;<=>>?@ABBCDEFFGHIJKKLMNOOPQRSTTUVWXXYZ[\\]^_`aabcdeef~g}h|i|j{jzkylxmwnwnvouptqsrsrrsqtpuovnwnwmxlykzj{j{i|h}g~ffedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                #' * -!1!4"7#;#>$A$D%G%J&M&P'S'V(Y|(\y)_u)br*eo*gl*jh+me+ob,r_,u\-wY-zV.|S.P.M/J/H0E0B0?1=1:2724223/3-3*4(4%4#5 55667777 8 8 8999:8753 1 /.,*)'%#" !#%'(*,.01 BBAAAA@@@@???!>">$>&>'=)=+=-=.<0<2<4;5;7;9:;:=:?:A9C9E9F8H}8J{8Lz7Nx7Qv7Su6Us6Wq6Yo5[n5]l5_j4bh4df3fd3hb3k`2m^2o]2r[1tY1wW0yT0{R0~P/N/L.J.H-F-C-A,?,=+:+8*6*3)1).),()(''$'"&&%$$##" " !!           !"##$%&'(()*+,,-./0012345567899:;<=>>?@ABBCDEFFGHIJKKLMNOOPQRSTTUVWXXYZ[\\]^_`aabcdee~f}g|h{i{jzjykxlwmwnvnuotpsqsrrrqsptounvnwmwlxkyjzj{i{h|g}f~fedcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                  # '!*!-"1"4#7#;$>$A%D%G&J&M'P'S(V(Y{)\x)_u*bq*en+gk+jh,me,ob,r^-u[-wX.zU.|R/P/M/J0G0D1A1?1<292734313/4,4*4'5$5"5 66677788 8 999:::8753 1 /.,*)'%#" "#%')+-.02 BBBBAAAA@@@@?!?"?$>&>'>)>+=-=.=0<2<4<5<7;9;;;=:?:A:C9E9F~9H}9J{8Ly8Nw8Qv7St7Ur7Wp6Yo6[m5]k5_i5bg4df4fd4hb3k`3m^3o\2rZ2tX1wV1yT1{R0~P0N/K/I/G.E.C-@->,<,:,7+5+3*0*.)+))(&($'!'&&%%$$# #"!!           !"##$%&'(()*+,,-./0012345567899:;<=>>?@ABBCDEFFGHIJKKLMNOOPQRSTTUVWXXYZ[\\]^_`aabcde~e}f|g{h{izjyjxkwlwmvnuntospsqrrqrpsotnunvmwlwkxjyjzi{h{g|f}e~edcbaa`_^]]\[ZYXXWVUTTSRQPPONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                      #!'!*"-"1#4#7$;%>%A&D&G'J'M(P(S)V~)Y{*\w*_t*bq+em+gj,jg,md-oa-r^.u[.wX.zU/|R/O0L0I0F1C1A2>2;293633414.4+5)5&5$6!667778889 9 9:::;:875 3 1 /.,*)'%#" "$&()+-/12 CCBBBBAAAA@@@!@"?$?&?'>)>+>->.=0=2=4<5<7<9<;;=;?;A:C:D:F~9H|9Jz9Ly8Nw8Qu8Ss7Ur7Wp7Yn6[l6]j6_i5bg5de5fc4ha4k_4m]3o[3rY2tW2wU2yS1{Q1~O0M0K0I/F/D.B.@.>-;-9,7,4+2+/*-**)()%(#( ('&&%%$ $ ##""            !"##$%&''()*+,,-./0012345567899:;<==>?@ABBCDEFFGHIJKKLMNOOPQRSSTUVWXXYZ[\\]^_`aabcd~e}e|f{g{hziyixjwkwlvmuntnsosprqqrprosntnumvlwkwjxjyizh{g{f|e}e~dcbaa`_^]]\[ZYXXWVUTTSRQPOONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                       !#!'"*#-#1$4$7%;%>&A&D'G'J(M(P)S)V}*Yz*\w+_s+bp,em,gj-jf-mc-o`.r].uZ/wW/zT/|Q0N0K1H1F2C2@2=3;383543405-5+5(6&6#6!777888999 : :::;;:875 3 1 /.,*)'%#" !#$&(*,./13 CCCCBBBBAAAA@!@"@$@&?'?)?+>->.>0>2=4=5=7<9<;<=?@ABBCDEFFGHIJKKLMNOOPQRSSTUVWXXYZ[\\]^_`aabc~d}e|e{f{gzhyixiwjwkvlumtnsnsorpqqprornsntmulvkwjwjxiyhzg{f{e|e}d~cbaa`_^]]\[ZYXXWVUTTSRQPOONMLKKJIHGGFEDCBBA@?>>=<;::987655432110/.-,,+*)(('&%$$#"!                      !!"#"'#*#-$1$4%7&;&>'A'D(G(J)M)P*S*V}*Yy+\v+_s,bo,el-gi-jf.mc.o`.r\/uY/wV0zS0|P1N1K1H2E2B3?3=3:4745425/5-5*6(6%7#7 7888999: : : :;;;<:875 3 1/.,*)'%#" !#%')*,.023 DDCCCCBBBBAAA!A"@$@&@'@)?+?-?.>0>2>4>5=7=9=;<=/<.:.8.5-3-0,.,,+)+'*$*!))((''&& % %$$#           !"##$%&''()*+,,-./0012345567899:;<==>?@ABBCDEFFGHIJKKLMNOOPQRSSTUVWXXYZ[\\]^_`aab~c}d|e{e{fzgyhxiwiwjvkultmsnsnroqppqornrnsmtlukvjwjwixhygzf{e{e|d}c~baa`_^]]\[ZYXXWVUTTSRQPOONMLKKJIHGGFEDCBBA@?>>=<;:9987655432110/.-,,+*)(('&%$$#"!                      !!""##'$*$-%1%4&7&;'>'A(D(G)J)M*P*S+V|+Yy,\u,_r-bo-ek-gh.je.mb/o_/r\0uY0wV0zS1|P1M2J2G2D3B3?4<494754515/6,6*6'7%7"7 888999:: : ;;;<<<:875 3 1/.,*)'%#" "$&')+-/024 DDDDCCCCBBBBA!A"A$A&@'@)@+@,?.?0?2>4>5>7>9=;===?0?@ABBCDEFFGHIJKKLMNOOPQRSSTUVWXXYZ[\\]^_`aa~b}c|d{e{ezfygxhwiwivjuktlsmsnrnqoppoqnrnrmsltkujvjviwhxgyfze{e{d|c}b~aa`_^]]\[ZYXXWVUTTSRQPOONMLKKJIHGGFEDCBBA@?>>=<;:9987655432110/.-,,+*)(('&%$##"!                    !""###$'$*%-%1&4'7';(>(A)D)G*J*M*P+S+V{,Yx,\u-_q-bn.ek.gh/jd/ma/o^0r[0uX1wU1zR1|O2L2I3G3D3A4>4;595653616.6,7)7&7$8!88999:::; ; ;<<<=<:87 5 3 1/.,*)'%#" "$&(*,-/134 EEDDDDCCCCBBB!B"A$A&A'A)@+@,@.@0?2?4?5>7>9>;>==?=A=C?@ABBCDEFFGHIJJKLMNOOPQRSSTUVWXXYZ[\\]^_``~a}b|c{d{ezeyfxgwhwiviujtkslsmrnqnpoopnqnrmrlsktjujvivhwgxfyeze{d{c|b}a~a`_^]]\[ZYXXWVUTTSRQPOONMLKKJIHGGFEDCBBA@?>>=<;:9987655432110/.-,,+*)(('&%$##"!                     ! !""##$#%'%*&-&1'4'7(;(>)A)D*G*J+M+P,S~,V{-Yw-\t._q.bm.ej/gg/jd0ma0o]1rZ1uW1wT2zQ2|O3L3I3F4C4@4>5;586563607.7+7(8&8#8!999:::;;; < <<<==<:87 5 3 1/.,*)'%#"! #%'(*,.0135 EEEEDDDDCCCCB!B"B$B&A'A)A+A,@.@0@2@4?5?7?9>;>=>?>A=B~=D|=F{ ? @ A B B C D E F F G H I J J K L M N O O P Q R S S T U V W X X Y Z [ \ \ ] ^ _ `~ `} a| b{ c{ dz ey ex fw gw hv iu it js ks lr mq np no on pn qm rl rk sj tj ui vh vg wf xe ye zd {c {b |a }a ~` _ ^ ] ] \ [ Z Y X X W V U T T S R Q P O O N M L K K J I H G G F E D C B B A @ ? > > = < ; : 9 9 8 7 6 5 5 4 3 2 1 1 0 / . - , , + * ) ( ( ' & % $ # # " !                                             ! ! "##$$%#%'&*&-'1(4(7);)>*A*D+G+J+M,P,S}-Vz-Yv.\s._p/bm/ei0gf0jc0m`1o]1rZ2uW2wT2zQ3|N3K4H4E4B5@5=5:6765727/7-8*8(8%9#9 9:::;;;< < < <===><:87 5 31/.,*)'%# "" #%')+-.0245 FFEEEEDDDDCCC!C"B$B&B'B)A+A,A.A0@2@4@5@7?9?;?=>?>A>B}>D|=Fz=Hy=Jw2<2:171503000./+/).&.$-!-,,++**) ) ((''$!  !!!!!!!!!!!! ! ! ! ! ! !!!!!!!!!!!!!!!!!!!!!!! !!!"!#!#!$!%!&!'!'!(!)!*!+!,!,!-!.!/!0!0!1!2!3!4!4!5!6!7!8!9!9!:!;!!?!@!A!B!B!C!D!E!F!F!G!H!I!J!J!K!L!M!N!O!O!P!Q!R!S!S!T!U!V!W!X!X!Y!Z![!\!\!]!^!_~!`}!`|!a{!b{!cz!dy!ex!ew!fw!gv!hu!it!is!js!kr!lq!mp!no!nn!on!pm!ql!rk!rj!sj!ti!uh!vg!vf!we!xe!yd!zc!{b!{a!|a!}`!~_!^!]!]!\![!Z!Y!X!X!W!V!U!T!T!S!R!Q!P!O!O!N!M!L!K!K!J!I!H!G!G!F!E!D!C!B!B!A!@!?!>!>!=!*A+D+G,J,M-P-S|.Vy.Yv.\r/_o/bl0ei0ge1jb1m_1o\2rY2uV3wS3zP3|M4J4G5E5B5?6<6:6774718/8,8*9'9%9": ::;;;<<< < ===>>><:87 5 31/.,*)'%# "" $&()+-/1246 FFFFEEEEDDDDC!C"C$C&B'B)B+B,A.A0A2A4@5@7@9@;?=???A>B}>D{>Fz>Hx=Jv=Lu=Ns"?"@"A"B"B"C"D"E"F"F"G"H"I"J"J"K"L"M"N"O"O"P"Q"R"S"S"T"U"V"W"W"X"Y"Z"["\"\"]"^~"_}"`|"`{"a{"bz"cy"dx"ew"ew"fv"gu"ht"is"is"jr"kq"lp"mo"mn"nn"om"pl"qk"rj"rj"si"th"ug"vf"ve"we"xd"yc"zb"{a"{a"|`"}_"~^"]"]"\"["Z"Y"X"X"W"V"U"T"T"S"R"Q"P"O"O"N"M"L"K"K"J"I"H"G"G"F"E"D"C"B"B"A"@"?">">"="<";":"9"9"8"7"6"5"5"4"3"2"1"1"0"/"."-",","+"*")"("("'"&"%"$"#"#"""!" """"""""""""""""""""""" " " " " " " """"""""""""!              !"" # $$%%&&#'''*(-)1)4*7*;+>+A,D,G,J-M-P.S|.Vx/Yu/\r0_n0bk0eh1ge1jb2m_2o[3rX3uU3wR4zO4|M5J5G5D6A6>6<797674818.9,9)9':$:":;;;;<<<= = =>>>>><:8 7 5 31/.,*)'%#!"# $&(*,./1356 GGFFFFEEEEDDD!D"D$C&C'C)C+B,B.B0A2A4A5A7@9@;@=@??A~?B|?D{>Fy>Hw>Jv=Lt=Nr=Pp=So"?"@"A"A"B"C"D"E"F"F"G"H"I"J"J"K"L"M"N"O"O"P"Q"R"S"S"T"U"V"W"W"X"Y"Z"["\"\"]~"^}"_|"`{"`{"az"by"cx"dw"ew"ev"fu"gt"hs"is"ir"jq"kp"lo"mn"mn"nm"ol"pk"qj"rj"ri"sh"tg"uf"ve"ve"wd"xc"yb"za"{a"{`"|_"}^"~]"]"\"["Z"Y"X"X"W"V"U"T"T"S"R"Q"P"O"O"N"M"L"K"K"J"I"H"G"G"F"E"D"C"B"B"A"@"?">">"="<";":"9"9"8"7"6"5"5"4"3"2"1"1"0"/"."-",","+"*")"("("'"&"%"$"#"#"""!" """"""""""""""""""""""" " " " " " " """""""""""""!              !"## $ $%&&''#('(*)-)1*4*7+;+>,A,D-G-J.M.P~/S{/Vx/Yt0\q0_n1bj1eg2gd2ja2m^3o[3rX4uU4wR4zO5|L5I6F6C6@7>7;788683809.9+9(:&:#:!;;;<<<=== > >>>??><:8 7 531/.,*)'%#!"# %')+,.02357 GGGGFFFFEEEEE!D"D$D&D'C)C+C,C.B0B2B4A5A7A9A;@=@?@A}@B|?Dz?Fx?Hw>Ju>Ls>Nr=Pp=Rn=Ul=Wk4<4:483533202.2,1)1'0$0"///..--, , ++**($!    ############ # # # # # ####################### #!#"#####$#%#&#'#'#(#)#*#+#+#,#-#.#/#0#0#1#2#3#4#4#5#6#7#8#9#9#:#;#<#=#=#>#?#@#A#A#B#C#D#E#F#F#G#H#I#J#J#K#L#M#N#O#O#P#Q#R#S#S#T#U#V#W#W#X#Y#Z#[#\#\~#]}#^|#_{#`{#`z#ay#bx#cw#dw#ev#eu#ft#gs#hs#ir#iq#jp#ko#ln#mn#mm#nl#ok#pj#qj#ri#rh#sg#tf#ue#ve#vd#wc#xb#ya#za#{`#{_#|^#}]#~]#\#[#Z#Y#X#X#W#V#U#T#T#S#R#Q#P#O#O#N#M#L#K#K#J#I#H#G#G#F#E#D#C#B#B#A#@#?#>#>#=#<#;#:#9#9#8#7#6#5#5#4#3#2#1#1#0#/#.#-#,#,#+#*#)#(#(#'#&#%#$#####"#!# ####################### # # # # # # #############"!               !"#$$ % %&&''(#(')**-*1+4+7,;,>-A-D-G.J.M/P~/Sz0Vw0Yt1\p1_m1bj2eg2gc3j`3m]4oZ4rW4uT5wQ5zN5|K6H6E7C7@7=8:888592909-:*:(:%;#; ;<<<===> > > >???@><:8 7 531/.,*)'% #""$ &')+-/02467 HHGGGGFFFFFEE!E"E$D&D'D)D+C,C.C0C2B4B5B7A9A;A=A?@A}@B{@Dz@Fx?Hv?Ju?Ls>Nq>Po>Rn=Ul=Wj=Yh=[f<]d<_c5;59474542303-2+2)1&1$1!00//..-- , ,++*($!  !$$$$$$$$$$$$ $ $ $ $ $ $$$$$$$$$$$$$$$$$$$$$$$ $!$"$#$#$$$%$&$'$'$($)$*$+$+$,$-$.$/$0$0$1$2$3$4$4$5$6$7$8$9$9$:$;$<$=$=$>$?$@$A$A$B$C$D$E$F$F$G$H$I$J$J$K$L$M$N$O$O$P$Q$R$S$S$T$U$V$W$W$X$Y$Z$[$\~$\}$]|$^{$_{$`z$`y$ax$bw$cw$dv$eu$et$fs$gs$hr$iq$ip$jo$kn$ln$mm$ml$nk$oj$pj$qi$rh$rg$sf$te$ue$vd$vc$wb$xa$ya$z`${_${^$|]$}]$~\$[$Z$Y$X$X$W$V$U$T$T$S$R$Q$P$O$O$N$M$L$K$K$J$I$H$G$G$F$E$D$C$B$B$A$@$?$>$>$=$<$;$:$9$9$8$7$6$5$5$4$3$2$1$1$0$/$.$-$,$,$+$*$)$($($'$&$%$$$#$#$"$!$ $$$$$$$$$$$$$$$$$$$$$$$ $ $ $ $ $ $ $$$$$$$$$$$$##"!               !"#$$% % &''(()#)'***-+1+4,7,;->-A.D.G/J/M0P}0Sz0Vv1Ys1\p2_l2bi3ef3gc3j`4m\4oY5rV5uS5wP6zM6|K6H7E7B8?8<8:979492:/:,:*;';%;"< <<===>>> > ???@@@><:8 7 531/.,*)'% #""$ &(*,-/13468 HHHHGGGGGFFFF!E"E$E&E'D)D+D,D.C0C2C3C5B7B9B;B=A?~AA|AB{@Dy@Fw@Hv@Jt?Lr?Np?Po>Rm>Uk>Wi=Yh=[f=]d<_b%?%@%A%A%B%C%D%E%F%F%G%H%I%J%J%K%L%M%N%N%O%P%Q%R%S%S%T%U%V%W%W%X%Y%Z%[~%\}%\|%]{%^{%_z%`y%`x%aw%bw%cv%du%dt%es%fs%gr%hq%ip%io%jn%kn%lm%ml%mk%nj%oj%pi%qh%rg%rf%se%te%ud%vc%vb%wa%xa%y`%z_%z^%{]%|]%}\%~[%Z%Y%X%X%W%V%U%T%T%S%R%Q%P%O%O%N%M%L%K%K%J%I%H%G%G%F%E%D%C%B%B%A%@%?%>%>%=%<%;%:%9%9%8%7%6%5%5%4%3%2%1%1%0%/%.%-%,%,%+%*%)%(%(%'%&%%%$%#%#%"%!% %%%%%%%%%%%%%%%%%%%%%%% % % % % % % %%%%%%%%%%%%$##"!               !"#$$%& & ''(()*#*'+*+-,1,4-7-;.>.A.D/G/J0M0P|1Sy1Vv2Yr2\o2_l3bh3ee4gb4j_4m\5oY5rV6uS6wP6zM7|J7G8D8A8?9<9996:4:1:.;,;);'<$<"<===>>>>? ? ?@@@@@><: 8 7 531/.,*)'%!##"% '(*,.013578 IIHHHHHGGGGFF!F"F$E&E'E)E+D,D.D0D2C3C5C7C9B;B=B?}B@|ABzADxAFw@Hu@Js@Lr@Np?Pn?Rl?Uk>Wi>Yg>[e=]c=_a=a`&?&@&A&A&B&C&D&E&F&F&G&H&I&J&J&K&L&M&N&N&O&P&Q&R&S&S&T&U&V&W&W&X&Y&Z~&[}&\|&\{&]{&^z&_y&`x&`w&aw&bv&cu&dt&ds&es&fr&gq&hp&io&in&jn&km&ll&mk&mj&nj&oi&ph&qg&rf&re&se&td&uc&vb&va&wa&x`&y_&z^&z]&{]&|\&}[&~Z&Y&X&X&W&V&U&T&T&S&R&Q&P&O&O&N&M&L&K&K&J&I&H&G&G&F&E&D&C&B&B&A&@&?&>&>&=&<&;&:&9&9&8&7&6&5&5&4&3&2&1&1&0&/&.&-&,&,&+&*&)&(&(&'&&&%&$&#&#&"&!& &&&&&&&&&&&&&&&&&&&&&&& & & & & & & &&&&&&&&&&&&%$##"!               !"#$$%&' ' (())**#+'+*,-,1-4-7.;.>/A/D0G0J1M1P|1Sx2Vu2Yq3\n3_k4bh4ee4ga5j^5m[6oX6rU6uR7wO7zL7|I8F8D9A9>9;:8:6:3;0;.;+<)<&<#=!==>>>>??? @ @@AAA@><: 8 7531/.,*)' %!##"% ')+-.024579 IIIIHHHHHGGGG!F"F$F&F'E)E+E,E.D0D2D3D5C7C9C;C=~B?}B@{BBzBDxAFvAHuAJs@Lq@No@Pn@Rl?Uj?Wh?Yf>[e>]c>_a=a_=d]=f[8<79776563605.5+4)4&4$3!32211100 / /..-+($ !  #&''''''''''' ' ' ' ' ' ''''''''''''''''''''''' '!'"'"'#'$'%'&'''''(')'*'+'+','-'.'/'0'0'1'2'3'4'4'5'6'7'8'8'9':';'<'='='>'?'@'A'A'B'C'D'E'F'F'G'H'I'J'J'K'L'M'N'N'O'P'Q'R'S'S'T'U'V'W'W'X'Y~'Z}'[|'\{'\{']z'^y'_x'`w'`w'av'bu'ct'ds'ds'er'fq'gp'ho'in'in'jm'kl'lk'mj'mj'ni'oh'pg'qf're're'sd'tc'ub'va'va'w`'x_'y^'z]'z]'{\'|['}Z'~Y'X'X'W'V'U'T'T'S'R'Q'P'O'O'N'M'L'K'K'J'I'H'G'G'F'E'D'C'B'B'A'@'?'>'>'='<';':'9'9'8'7'6'5'5'4'3'2'1'1'0'/'.'-',','+'*')'('('''&'%'$'#'#'"'!' ''''''''''''''''''''''' ' ' ' ' ' ' ''''''''''''&%$##"!               !"#$$%&'' ( ()**++#,',*---1.4.7/;/>/A0D0G1J1M~2P{2Sw3Vt3Yq3\m4_j4bg5ed5ga5j^6mZ6oW7rT7uQ7wN8zK8|I8F9C9@:=:::8;5;2;0<-<*<(=%=#= >>>>???@ @ @ AAAAB@><: 8 7531/.,*)' %"#$"& ()+-/124689 JJIIIIIHHHHGG!G"G$F&F'F)F+F,E.E0E2E3D5D7D9C;C=~C?|C@{BByBDwBFvBHtAJrALpANo@Pm@Rk@Ui@Wh?Yf?[d?]b>_`>a^>d\=fZ=hY=kW'?'@'A'A'B'C'D'E'F'F'G'H'I'J'J'K'L'M'N'N'O'P'Q'R'S'S'T'U'V'W'W'X~'Y}'Z|'[{'\{'\z']y'^x'_w'`w'`v'au'bt'cs'ds'dr'eq'fp'go'hn'in'im'jl'kk'lj'mj'mi'nh'og'pf'qe're'rd'sc'tb'ua'va'v`'w_'x^'y]'z]'z\'{['|Z'}Y'~X'X'W'V'U'T'T'S'R'Q'P'O'O'N'M'L'K'K'J'I'H'G'G'F'E'D'C'B'B'A'@'?'>'>'='<';':'9'9'8'7'6'5'5'4'3'2'1'1'0'/'.'-',','+'*')'('('''&'%'$'#'#'"'!' ''''''''''''''''''''''' ' ' ' ' ' ' '''''''''''''&%$##"!               !"#$$%&'(( ) )**++,#,'-*--.1.4/7/;0>0A1D1G2J2M~2Pz3Sw3Vs4Yp4\m5_j5bf5ec6g`6j]7mZ7oW7rT8uQ8wN8zK9|H9E9B:?:=;:;7;4<2"> >>???@@@ A AAABBB@>< : 8 7531/.,*)'!%##$"& (*,./13568: JJJJIIIIIHHHH!G"G$G&G'G)F+F,F.F0E2E3E5E7D9D;D=}D?|C@zCBxCDwBFuBHsBJrBLpANnAPlARk@Ui@Wg@Ye@[c?]a?_`?a^>d\>fZ>hX=kV=mT=oR(?(@(A(A(B(C(D(E(E(F(G(H(I(J(J(K(L(M(N(N(O(P(Q(R(S(S(T(U(V(W(W~(X}(Y|(Z{([{([z(\y(]x(^w(_w(`v(`u(at(bs(cs(dr(dq(ep(fo(gn(hn(im(il(jk(kj(lj(mi(mh(ng(of(pe(qe(qd(rc(sb(ta(ua(v`(v_(w^(x](y](z\(z[({Z(|Y(}X(~X(W(V(U(T(T(S(R(Q(P(O(O(N(M(L(K(K(J(I(H(G(G(F(E(D(C(B(B(A(@(?(>(>(=(<(;(:(9(9(8(7(6(5(5(4(3(2(1(1(0(/(.(-(,(,(+(*()((((('(&(%($(#(#("(!( ((((((((((((((((((((((( ( ( ( ( ( ( ((((((((((((''&%$##"!              !"#$$%&'(() * *++,,-#-'.*.-/1/4070;1>1A1D2G2J3M}3Py4Sv4Vs4Yo5\l5_i6bf6eb6g_7j\7mY8oV8rS8uP9wM9zJ9|G:D:B:?;<;9<6<4<1=.=,=)>'>$>">???@@@AA A ABBBCB@>< : 8 7531/.,*)'!%##%"' )+,.023579: KKJJJJJIIIIHH!H"H$H&G'G)G+G,F.F0F2F3E5E7E9E;~D=}D?{D@yDBxCDvCFtCHsBJqBLoBNnBPlARjAThAWf@Ye@[c@]a@__?a]?d[?fY>hW>jU>mS=oQ=rO=tM:<::979583818.7,7)7'6$6"55444332 2 1110/+( % ! "%)))))))))))) ) ) ) ) ) ))))))))))))))))))))))) )!)")")#)$)%)&)')')()))*)+)+),)-).)/)/)0)1)2)3)4)4)5)6)7)8)8)9):);)<)=)=)>)?)@)A)A)B)C)D)E)E)F)G)H)I)J)J)K)L)M)N)N)O)P)Q)R)S)S)T)U)V)W~)W})X|)Y{)Z{)[z)[y)\x)]w)^w)_v)`u)`t)as)bs)cr)dq)dp)eo)fn)gn)hm)il)ik)jj)kj)li)mh)mg)nf)oe)pe)qd)qc)rb)sa)ta)u`)v_)v^)w])x])y\)z[)zZ){Y)|X)}X)~W)V)U)T)T)S)R)Q)P)O)O)N)M)L)K)K)J)I)H)G)G)F)E)D)C)B)B)A)@)?)>)>)=)<);):)9)9)8)7)6)5)5)4)3)2)1)1)0)/).)-),),)+)*)))()()')&)%)$)#)#)")!) ))))))))))))))))))))))) ) ) ) ) ) ) ))))))))))))(''&%$##"!           !"#$$%&'())* * ++,,--#.'.*/-/104071;1>2A2D3G3J3M|4Py4Su5Vr5Yo6\k6_h6be7eb7g_8j[8mX8oU9rR9uO9wL:zI:|G:D;A;>;;<9<6=3=0=.=+>)>&>$?!??@@@AAAA B BBCCCB@>< : 87531/.,*) '"%$#%"' )+-/024679; KKKKKJJJJIIII!I"H$H&H'H)G+G,G.G0F2F3F5F7E9E;~E=|E?{D@yDBwDDvDFtCHrCJpCLoBNmBPkBRiBThAWfAYdA[b@]`@_^@a]@d[?fY?hW?jU>mS>oQ>rO=tM=vK=yH<{F<~D;;:9:79592908-8+8)7&7$6!66554443 3 2211/+( % ! #&)*********** * * * * * *********************** *!*"*"*#*$*%*&*'*'*(*)***+*+*,*-*.*/*/*0*1*2*3*4*4*5*6*7*8*8*9*:*;*<*=*=*>*?*@*A*A*B*C*D*E*E*F*G*H*I*J*J*K*L*M*N*N*O*P*Q*R*S*S*T*U*V~*W}*W|*X{*Y{*Zz*[y*[x*\w*]w*^v*_u*`t*`s*as*br*cq*dp*do*en*fn*gm*hl*ik*ij*jj*ki*lh*mg*mf*ne*oe*pd*qc*qb*ra*sa*t`*u_*v^*v]*w]*x\*y[*zZ*zY*{X*|X*}W*~V*U*T*T*S*R*Q*P*O*O*N*M*L*K*K*J*I*H*G*G*F*E*D*C*B*B*A*@*?*>*>*=*<*;*:*9*9*8*7*6*5*5*4*3*2*1*1*0*/*.*-*,*,*+***)*(*(*'*&*%*$*#*#*"*!* *********************** * * * * * * ************)(''&%$##"!         !"#$$%&'())*+ + ,,--..#/'/*0-0114172;2>2A3D3G4J4M{5Px5Su5Vq6Yn6\k7_g7bd7ea8g^8j[9mX9oU9rR:uO:wL:zI;|F;C;@<=<;<8=5=3=0>->+>(?%?#? @@@AAAAB B BCCCCDB@>< : 87531/.,*) '"%$#&"( *,-/13568:; LLKKKKKJJJJJI!I"I$I%H'H)H+H,G.G0G2G3F5F7F9F;}E=|E?zE@xEBwDDuDFsDHrDJpCLnCNlCPkCRiBTgBWeBYcA[bA]`A_^@a\@dZ@fX@hV?jT?mR?oP>qN>tL>vJ=yH={F=~D+?+@+A+A+B+C+D+E+E+F+G+H+I+J+J+K+L+M+N+N+O+P+Q+R+S+S+T+U~+V}+W|+W{+X{+Yz+Zy+[x+[w+\w+]v+^u+_t+`s+`s+ar+bq+cp+do+dn+en+fm+gl+hk+ij+ij+ji+kh+lg+mf+me+ne+od+pc+qb+qa+ra+s`+t_+u^+v]+v]+w\+x[+yZ+zY+zX+{X+|W+}V+~U+T+T+S+R+Q+P+O+O+N+M+L+K+K+J+I+H+G+G+F+E+D+C+B+B+A+@+?+>+>+=+<+;+:+9+9+8+7+6+5+5+4+3+2+1+1+0+/+.+-+,+,+++*+)+(+(+'+&+%+$+#+#+"+!+ +++++++++++++++++++++++ + + + + + + ++++++++++++*)(''&%$##" !       ! "#$$%&'())*++ , ,-..//#0'0*0-1114272;3>3A4D4G4J~5M{5Pw6St6Vq7Ym7\j7_g8bd8e`9g]9jZ9mW:oT:rQ:uN;wK;zH;|E<B<@<==:=7=5>2>/>-?*?'?%@"@ @AAAABBB C CCCDDDB@> < : 87531/.,*)!'#%%#'"( *,.013578:< LLLLLKKKKJJJJ!J"I$I%I'I)H+H,H.H0H2G3G5G7G9~F;}F={F?yF@xEBvEDtEFsDHqDJoDLnDNlCPjCRhCTfCWeBYcB[aB]_A_]Aa[AdY@fW@hU@jS@mQ?oO?qM?tK>vI>yG>{E=~C=A=?<<<:<8;6;3:1:.:,9*9'9%8"8 7776655 5 44332/+( %!! $(++++++++++++ + + + + + +++++++++++++++++++++++ +!+"+"+#+$+%+&+'+'+(+)+*+++++,+-+.+/+/+0+1+2+3+4+4+5+6+7+8+8+9+:+;+<+<+=+>+?+@+A+A+B+C+D+E+E+F+G+H+I+J+J+K+L+M+N+N+O+P+Q+R+R+S+T~+U}+V|+W{+W{+Xz+Yy+Zx+[w+[w+\v+]u+^t+_s+`s+`r+aq+bp+co+dn+dn+em+fl+gk+hj+hj+ii+jh+kg+lf+me+me+nd+oc+pb+qa+qa+r`+s_+t^+u]+v]+v\+w[+xZ+yY+zX+zX+{W+|V+}U+~T+~T+S+R+Q+P+O+O+N+M+L+K+K+J+I+H+G+G+F+E+D+C+B+B+A+@+?+>+>+=+<+;+:+9+9+8+7+6+5+5+4+3+2+1+1+0+/+.+-+,+,+++*+)+(+(+'+&+%+$+#+#+"+!+ +++++++++++++++++++++++ + + + + + + +++++++++++++*)(''&%$## " !     ! " #$$%&'())*+,, - -..//0#0'1*1-2124373;3>4A4D5G5J}6Mz6Pw6Ss7Vp7Ym8\i8_f8bc9e`9g]:jY:mV:oS;rP;uM;wJ7>4>1?/?,?)@'@$@"AAABBBBCC C CDDDEDB@> < : 87531/.,* )!'#%%#'") +-.024679;< MMLLLLLKKKKKJ J"J$J%I'I)I+I,I.H0H2H3H5G7G9~G;|G={F>yF@wFBvFDtEFrEHpEJoELmDNkDPiDRhCTfCWdCYbC[`B]_B_]Ba[AdYAfWAhUAjS@mQ@oO@qM?tK?vI?yG>{D>~B>@=>=<=9<7<5<3;0;.:+:):'9$9"98877766 5 54442/+ ( %!" %(,,,,,,,,,,,, , , , , , ,,,,,,,,,,,,,,,,,,,,,,, ,!,",",#,$,%,&,&,',(,),*,+,+,,,-,.,/,/,0,1,2,3,4,4,5,6,7,8,8,9,:,;,<,<,=,>,?,@,A,A,B,C,D,E,E,F,G,H,I,J,J,K,L,M,N,N,O,P,Q,R,R,S~,T},U|,V{,W{,Wz,Xy,Yx,Zw,[w,[v,\u,]t,^s,_s,`r,`q,ap,bo,cn,dn,dm,el,fk,gj,hj,hi,ih,jg,kf,le,me,md,nc,ob,pa,qa,q`,r_,s^,t],u],v\,v[,wZ,xY,yX,zX,zW,{V,|U,}T,~T,~S,R,Q,P,O,O,N,M,L,K,K,J,I,H,G,G,F,E,D,C,B,B,A,@,?,>,>,=,<,;,:,9,9,8,7,6,5,5,4,3,2,1,1,0,/,.,-,,,,,+,*,),(,(,',&,%,$,#,#,",!, ,,,,,,,,,,,,,,,,,,,,,,, , , , , , , ,,,,,,,,,,,,,+*)(''&%$# # " !   ! " # $$%&'())*+,-- . .//001#1'2*2-2134374;4>5A5D6G6J}6My7Pv7Sr8Vo8Yl8\i9_e9bb:e_:g\:jY;mV;oS;rP><>9>6?3?1?.@+@)@&A$A!ABBBBCCCD D DDEEEDB@> < :87531/.,* )"'$%&#(") +-/13468:;= MMMMMLLLLLKKK K"J$J%J'J)J+I,I.I0I2H3H5H7H9}G;|G=zG>xG@wFBuFDsFFrFHpEJnELlENkEPiDRgDTeDWcCYbC[`C]^C_\BaZBdXBfVAhTAjRAmPAoN@qL@tJ@vH?yF?{D?~B>?>=>;=9=6=4<2<0;-;+;(:&:#:!99888776 6 65542/, ( %!# &)------------ - - - - - ----------------------- -!-"-"-#-$-%-&-&-'-(-)-*-+-+-,---.-/-/-0-1-2-3-4-4-5-6-7-8-8-9-:-;-<-<-=->-?-@-A-A-B-C-D-E-E-F-G-H-I-J-J-K-L-M-N-N-O-P-Q-R-R~-S}-T|-U{-V{-Wz-Wy-Xx-Yw-Zw-[v-[u-\t-]s-^s-_r-`q-`p-ao-bn-cn-dm-dl-ek-fj-gj-hi-hh-ig-jf-ke-le-md-mc-nb-oa-pa-q`-q_-r^-s]-t]-u\-v[-vZ-wY-xX-yX-zW-zV-{U-|T-}T-~S-~R-Q-P-O-O-N-M-L-K-K-J-I-H-G-G-F-E-D-C-B-B-A-@-?->->-=-<-;-:-9-9-8-7-6-5-5-4-3-2-1-1-0-/-.---,-,-+-*-)-(-(-'-&-%-$-#-#-"-!- ----------------------- - - - - - - ------------,,+*)(''&%$ # # " !  ! " # $ $%&'())*+,--. . //0011#2'2*3-3144475;5>5A6D6G7J|7My7Pu8Sr8Vn9Yk9\h9_e:ba:e^;g[;jX;mU@>>>;?8?5?3@0@-@+A(A&A#B BBBCCCDD D DEEEEFDB@> < :87531/.,*!)#'$%&#("* ,./13578:<= NNMMMMMLLLLLK K"K$K%K'J)J+J,J.I0I2I3I5H7~H9}H;{H=yG>xG@vGBtGDsFFqFHoFJnFLlENjEPhERgETeDWcDYaD[_D]]C_[CaYCdXBfVBhTBjRAmPAoNAqLAtI@vG@yE@{C?~A???=>:>8>6=4=1=/<,<*;(;%;#: ::998887 766552/, (%! # '*-........... . . . . . ....................... .!.".".#.$.%.&.&.'.(.).*.+.+.,.-..././.0.1.2.3.4.4.5.6.7.8.8.9.:.;.<.<.=.>.?.@.A.A.B.C.D.E.E.F.G.H.I.J.J.K.L.M.N.N.O.P.Q.R~.R}.S|.T{.U{.Vz.Wy.Wx.Xw.Yw.Zv.[u.[t.\s.]s.^r._q.`p.`o.an.bn.cm.dl.dk.ej.fj.gi.hh.hg.if.je.ke.ld.mc.mb.na.oa.p`.q_.q^.r].s].t\.u[.vZ.vY.wX.xX.yW.zV.zU.{T.|T.}S.~R.~Q.P.O.O.N.M.L.K.K.J.I.H.G.G.F.E.D.C.B.B.A.@.?.>.>.=.<.;.:.9.9.8.7.6.5.5.4.3.2.1.1.0./...-.,.,.+.*.).(.(.'.&.%.$.#.#.".!. ....................... . . . . . . ............-,,+*)(''&% $ # # " !   ! " # $ $ %&'())*+,-../ / 001122#3'3*4-4144575;6>6A7D7G7J{8Mx8Pt9Sq9Vn9Yj:\g:_d;ba;e^;gZzF>|C>@?=?:?7@5@2@/A-A*A'B%B"B BCCCDDDD E EEFFFFDB@ > < :87531/.,*!)#'%%'#)"+ ,.024579;<> NNNNNMMMMMLLL L"L$K%K'K)K+J,J.J0J2J3I5I7~I9|I;zH=yH>wH@vHBtGDrGFpGHoGJmFLkFNjFPhERfETdEWbEY`D[_D]]D_[DaYCdWCfUChSBjQBmOBoMAqKAtIAvGAyE@{B@~@@>?5>3>1=.=,=)<'<%;"; ;:::998 8 777652/, (%!!$ '+./////////// / / / / / /////////////////////// /!/"/"/#/$/%/&/&/'/(/)/*/+/+/,/-/./////0/1/2/3/3/4/5/6/7/8/8/9/:/;//?/@/A/A/B/C/D/E/E/F/G/H/I/I/J/K/L/M/N/N/O/P/Q~/R}/R|/S{/T{/Uz/Vy/Wx/Ww/Xw/Yv/Zu/[t/[s/\s/]r/^q/_p/_o/`n/an/bm/cl/dk/dj/ej/fi/gh/hg/hf/ie/je/kd/lc/mb/ma/na/o`/p_/q^/q]/r]/s\/t[/uZ/uY/vX/wX/xW/yV/zU/zT/{T/|S/}R/~Q/~P/O/O/N/M/L/K/K/J/I/H/G/G/F/E/D/C/B/B/A/@/?/>/>/=/7A7D8G~8J{9Mw9Pt9Sp:Vm:Yj:\f;_c;b`uK>wH>zE?|B???<@:@7@4A1A/A,B)B'B$B"CCCDDDDEE E FFFFGFDB@ > <:87531/., *")$'%%'#)"+ -/124689;=> OONNNNNNMMMML L"L$L%L'K)K+K,K.K0J2J3J5J7}I9|I;zI=xI>wH@uHBsHDrHFpGHnGJlGLkGNiFPgFReFTdFWbEY`E[^E]\D_ZDaXDdVDfTChRCjPCmNBoLBqJBtHAvFAyDA{BA~@@=@;@9?7?4?2>0>.>+=)=&<$0?0@0A0A0B0C0D0E0E0F0G0H0I0I0J0K0L0M0N0N0O0P~0Q}0R|0R{0S{0Tz0Uy0Vx0Ww0Ww0Xv0Yu0Zt0[s0[s0\r0]q0^p0_o0_n0`n0am0bl0ck0dj0dj0ei0fh0gg0hf0he0ie0jd0kc0lb0ma0ma0n`0o_0p^0q]0q]0r\0s[0tZ0uY0uX0vX0wW0xV0yU0zT0zT0{S0|R0}Q0~P0~O0O0N0M0L0K0K0J0I0H0G0G0F0E0D0C0B0B0A0@0?0>0>0=0<0;0:090908070605050403020101000/0.0-0,0,0+0*0)0(0(0'0&0%0$0#0#0"0!0 00000000000000000000000 0 0 0 0 0 0 000000000000/.-,,+*)('' & % $ # # "!   !" # $ $ % & '())*+,-../00 1 122334#4'5*5-6164677;7>8A8D8G}9Jz9Mv:Ps:Sp:Vl;Yi;\f<_coP>rM>uJ?wG?zD?|A@>@<@9A6A3A1B.B+B)C&C$C!CDDDEEEEF F FFGGGFDB@ > <:87531/., *")$'&%(#*", -/13568:<=? OOOOONNNNNMMM M"M$L%L'L)L+L,K.K0K2K3J5~J7}J9{J;yI=xI>vI@tIBsHDqHFoHHnHJlGLjGNhGPgGReFTcFVaFY_F[]E]\E_ZEaXEdVDfTDhRDjPCmNCoLCqJBtHBvEByCB{AA~?A=A;@8@6@4?2?/?->*>(>&=#=!<<<;;;:: 9 998862/ , (%!"& ),000000000000 0 0 0 0 0 00000000000000000000000 0!0"0"0#0$0%0&0&0'0(0)0*0+0+0,0-0.0/0/0001020303040506070808090:0;0<0<0=0>0?0@0A0A0B0C0D0E0E0F0G0H0I0I0J0K0L0M0N0N0O~0P}0Q|0R{0R{0Sz0Ty0Ux0Vw0Ww0Wv0Xu0Yt0Zs0[s0[r0\q0]p0^o0_n0_n0`m0al0bk0cj0dj0di0eh0fg0gf0he0he0id0jc0kb0la0ma0m`0n_0o^0p]0q]0q\0r[0sZ0tY0uX0uX0vW0wV0xU0yT0zT0zS0{R0|Q0}P0~O0~O0N0M0L0K0K0J0I0H0G0G0F0E0D0C0B0B0A0@0?0>0>0=0<0;0:090908070605050403020101000/0.0-0,0,0+0*0)0(0(0'0&0%0$0#0#0"0!0 00000000000000000000000 0 0 0 0 0 0 0000000000000/.-,,+*)(' ' & % $ # #"!   !"# $ $ % & ' ())*+,-../011 2 233444#5'5*6-6174778;8>8A9D9G}:Jy:Mv:Pr;So;VljU>mR>oO?rL?uI?wF@zD@|A@>A;A8A6B3B0B-C+C(C&C#D DDEEEEFF F GGGGHHFDB@ > <:87531/.,!*#)%''%(#*", .023579:<>? PPPOOOOONNNNN M"M$M%M'L)L+L,L.L0K2K3K5~K7|J9zJ;yJ=wJ>vJ@tIBrIDpIFoIHmHJkHLjHNhHPfGRdGTbGVaFY_F[]F][F_YEaWEdUEfSEhQDjODmMDoKCqICtGCvEByCB{AB~>B%>"> ==<<<;;; ::99862/ ,(%! #' *-111111111111 1 1 1 1 1 11111111111111111111111 1!1"1"1#1$1%1&1&1'1(1)1*1+1+1,1-1.1/1/1011121313141516171818191:1;1<1<1=1>1?1@1A1A1B1C1D1E1E1F1G1H1I1I1J1K1L1M1N1N~1O}1P|1Q{1R{1Rz1Sy1Tx1Uw1Vw1Wv1Wu1Xt1Ys1Zs1[r1[q1\p1]o1^n1_n1_m1`l1ak1bj1cj1di1dh1eg1ff1ge1he1hd1ic1jb1ka1la1m`1m_1n^1o]1p]1q\1q[1rZ1sY1tX1uX1uW1vV1wU1xT1yT1zS1zR1{Q1|P1}O1~O1~N1M1L1K1K1J1I1H1G1G1F1E1D1C1B1B1A1@1?1>1>1=1<1;1:191918171615151413121111101/1.1-1,1,1+1*1)1(1(1'1&1%1$1#1#1"1!1 11111111111111111111111 1 1 1 1 1 1 11111111111110/.-,,+*)( ' ' & % $ ##"!   !"#$ $ % & ' ( ))*+,-../0112 2 334455#6'6*7-7174878;9>9A:D:G|:Jx;Mu;Pr;Sne[>gX>jU?mR?oO?rL@uI@wF@zCA|@A=A:B8B5B2C/C-C*C(D%D"D EEEEFFFG G GGHHHHFDB @ > <:87531/.,!*#)%''%)#+"- .024679;=>@ PPPPPOOOOONNN N"N$M%M'M)M+M,L.L0L2L3K5}K7|K9zK;xK=wJ>uJ@sJBrJDpIFnIHlIJkILiHNgHPeHRdHTbGV`GY^G[\G]ZF_XFaVFdUEfSEhQEjOEmMDoJDqHDtFCvDCyBC{@C~>B>===<<< ; ;:::962/ ,(%!!$' +.122222222222 2 2 2 2 2 22222222222222222222222 2!2"2"2#2$2%2&2&2'2(2)2*2*2+2,2-2.2/2/2021222323242526272828292:2;2<2<2=2>2?2@2@2A2B2C2D2E2E2F2G2H2I2I2J2K2L2M2N~2N}2O|2P{2Q{2Rz2Ry2Sx2Tw2Uw2Vv2Vu2Wt2Xs2Ys2Zr2[q2[p2\o2]n2^n2_m2_l2`k2aj2bj2ci2dh2dg2ef2fe2ge2hd2hc2ib2ja2ka2l`2l_2m^2n]2o]2p\2q[2qZ2rY2sX2tX2uW2uV2vU2wT2xT2yS2zR2zQ2{P2|O2}O2~N2~M2L2K2K2J2I2H2G2G2F2E2D2C2B2B2A2@2?2>2>2=2<2;2:292928272625252423222121202/2.2-2,2,2+2*2)2(2(2'2&2%2$2#2#2"2!2 22222222222222222222222 2 2 2 2 2 2 222222222222110/.-,,+*) ( ' ' & % $##"!   !"#$$ % & ' ( ) )*+,-../01223 3 445566#7'7*7-8184979;9>:A:D;G{;Jx;Mt_`>b]>eZ?gW?jT?mQ@oN@rK@uHAwEAzBA|?B=B:B7C4C2C/D,D*D'D$E"EEFFFFGGG G HHHHIHFDB @ ><:87531/. ,"*$)&'(%)#+"- /13468:;=?@ QQQPPPPPOOOOO N"N$N%N'N)M+M,M.M0L2L3~L5}L7{L9yK;xK=vK>tK@sJBqJDoJFnJHlIJjILhINgIPeHRcHTaHV_HY]G[\G]ZG_XGaVFdTFfRFhPFjNEmLEoJEqHDtFDvDDyAC{?C~=C;C9B6B4B2A0A-A+@(@&@$?!??>>===< < <;;:962 / ,(%""%( +/233333333333 3 3 3 3 3 33333333333333333333333 3!3"3"3#3$3%3&3&3'3(3)3*3*3+3,3-3.3/3/3031323333343536373838393:3;3<3<3=3>3?3@3@3A3B3C3D3E3E3F3G3H3I3I3J3K3L3M~3N}3N|3O{3P{3Qz3Ry3Rx3Sw3Tw3Uv3Vu3Vt3Ws3Xs3Yr3Zq3[p3[o3\n3]n3^m3_l3_k3`j3aj3bi3ch3dg3df3ee3fe3gd3hc3hb3ia3ja3k`3l_3l^3m]3n]3o\3p[3qZ3qY3rX3sX3tW3uV3uU3vT3wT3xS3yR3zQ3zP3{O3|O3}N3~M3~L3K3K3J3I3H3G3G3F3E3D3C3B3B3A3@3?3>3>3=3<3;3:393938373635353433323131303/3.3-3,3,3+3*3)3(3(3'3&3%3$3#3#3"3!3 33333333333333333333333 3 3 3 3 3 3 3333333333332110/.-,,+* ) ( ' ' & %$##"!   !"#$$% & ' ( ) ) *+,-../012234 4 555667#7'8*8-919497:;:>;A;D~;GzYf>\c>_`?b]?eY?gV@jS@mP@oMArJAuGAwDBzBB|?B<:87531/.!,"*$)&'(%*#,". 013578:<>?A QQQQQPPPPPPOO O"O$O%N'N)N*N,M.M0M2M3~M5|L7zL9yL;wL=vK>tK@rKBpKDoKFmJHkJJjJLhJNfIPdIRbITaIV_HY]H[[H]YH_WGaUGdSGfQFhOFjMFmKFoIEqGEtEEvCDyAD{?D~>== = <<<;962 / ,(%""&) ,/344444444444 4 4 4 4 4 44444444444444444444444 4!4"4"4#4$4%4&4&4'4(4)4*4*4+4,4-4.4/4/4041424343444546474848494:4;4<4<4=4>4?4@4@4A4B4C4D4E4E4F4G4H4I4I4J4K4L~4M}4N|4N{4O{4Pz4Qy4Rx4Rw4Sw4Tv4Uu4Vt4Vs4Ws4Xr4Yq4Zp4[o4[n4\n4]m4^l4_k4_j4`j4ai4bh4cg4df4de4ee4fd4gc4hb4ha4ia4j`4k_4l^4l]4m]4n\4o[4pZ4qY4qX4rX4sW4tV4uU4uT4vT4wS4xR4yQ4zP4zO4{O4|N4}M4~L4~K4K4J4I4H4G4G4F4E4D4C4B4B4A4@4?4>4>4=4<4;4:494948474645454443424141404/4.4-4,4,4+4*4)4(4(4'4&4%4$4#4#4"4!4 44444444444444444444444 4 4 4 4 4 4 44444444444432110/.-,,+ * ) ( ' ' &%$##"!   !"#$$%& ' ( ) ) * +,-../0122344 5 566778#8'8*9-91:4:7;;;>;ASl>Vi>Ye?\b?__?b\@eY@gV@jSAmPAoMArJBuGBwDBzAC|>C;C8D6D3D0E.E+E(E&F#F!FGGGGHHH H IIIIJJHFD B @ ><:87531/.!,#*%)'')%+#,". 024579;<>@A RRRQQQQQPPPPP O"O$O%O'O)N*N,N.N0N2M3}M5{M7zM9xL;wL