pax_global_header00006660000000000000000000000064145026340450014515gustar00rootroot0000000000000052 comment=3e33b872645e8bf929a4e4620a52378879bb9263 rocThrust-rocm-5.7.1/000077500000000000000000000000001450263404500144625ustar00rootroot00000000000000rocThrust-rocm-5.7.1/.clang-format000066400000000000000000000063511450263404500170420ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: true AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlinesLeft: true AlignOperands: true AlignTrailingComments: false AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true #SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false #FixNamespaceComments: true IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: Inner ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve #IndentPPDirectives: AfterHash --- rocThrust-rocm-5.7.1/.git-blame-ignore-revs000066400000000000000000000006721450263404500205670ustar00rootroot00000000000000# Exclude these commits from git-blame and similar tools. # # To use this file, run the following command from the repo root: # # ``` # $ git config blame.ignoreRevsFile .git-blame-ignore-revs # ``` # # Include a brief comment with each commit added, for example: # # ``` # d92d9f8baac5ec48a8f8718dd69f415a45efe372 # Initial clang-format # ``` # # Only add commits that are pure formatting changes (e.g. # clang-format version changes, etc). rocThrust-rocm-5.7.1/.gitattributes000066400000000000000000000001531450263404500173540ustar00rootroot00000000000000*.pdf binary *.doc binary *.docx binary *.ppt binary *.pptx binary *.xls binary *.xlsx binary *.xps binary rocThrust-rocm-5.7.1/.githooks/000077500000000000000000000000001450263404500163675ustar00rootroot00000000000000rocThrust-rocm-5.7.1/.githooks/install000077500000000000000000000002121450263404500177560ustar00rootroot00000000000000#!/bin/sh cd "$(git rev-parse --git-dir)" cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" rocThrust-rocm-5.7.1/.githooks/pre-commit000077500000000000000000000012261450263404500203720ustar00rootroot00000000000000#!/bin/sh # Redirect output to stderr. exec 1>&2 check_failed=false # Do the copyright check # update & apply copyright when hook config is set, otherwise just verify opts="-qc" if [ "$(git config --get --type bool --default false hooks.updateCopyright)" = "true" ]; then opts="-qca" fi if ! "$(git rev-parse --show-toplevel)/scripts/copyright-date/check-copyright.sh" "$opts" 1>&2; then printf "\n\033[31mFailed\033[0m: copyright date check.\n" check_failed=true fi if $check_failed; then printf " Pre-commit check failed, please fix the reported errors. Note: Use '\033[33mgit commit --no-verify\033[0m' to bypass checks.\n" exit 1 fi rocThrust-rocm-5.7.1/.github/000077500000000000000000000000001450263404500160225ustar00rootroot00000000000000rocThrust-rocm-5.7.1/.github/dependabot.yml000066400000000000000000000010421450263404500206470ustar00rootroot00000000000000# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values directory: "/docs/.sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" rocThrust-rocm-5.7.1/.github/workflows/000077500000000000000000000000001450263404500200575ustar00rootroot00000000000000rocThrust-rocm-5.7.1/.github/workflows/docs.yaml000066400000000000000000000045551450263404500217040ustar00rootroot00000000000000name: Upload to the upload server # Controls when the workflow will run on: push: branches: [develop, master] tags: - rocm-5.* release: types: [published] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - name: getting branch name shell: bash run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: branch_name - name: getting tag name shell: bash run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" id: tag_name - name: zipping files run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' - name: echo-step run: echo "${{ github.event.release.target_commitish }}" - name: uploading archive to prod if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.PROD_UPLOAD_URL }}' args: '-o ConnectTimeout=5' - name: uploading archive to staging if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.STG_UPLOAD_URL }}' args: '-o ConnectTimeout=5' rocThrust-rocm-5.7.1/.gitignore000066400000000000000000000013451450263404500164550ustar00rootroot00000000000000### Build dirs ### build/ ### Docs dirs ### doc/html/ doc/xml/ doc/latex/ doc/*.tag # Created by https://www.gitignore.io/api/c++,cmake ### C++ ### # Prerequisites *.d # Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod *.smod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app ### CMake ### CMakeCache.txt CMakeFiles CMakeScripts Makefile cmake_install.cmake install_manifest.txt compile_commands.json CTestTestfile.cmake thrust/system/cuda/detail/.gitignore *.bash run build* discrete_voronoi.pgm # End of https://www.gitignore.io/api/c++,cmake .vscode docs/_build/doctrees/environment.pickle rocThrust-rocm-5.7.1/.gitlab-ci.yml000066400000000000000000000121271450263404500171210ustar00rootroot00000000000000# ######################################################################## # Copyright 2019-2023 Advanced Micro Devices, Inc. # ######################################################################## include: - project: 'amd/ci-templates' ref: main file: - /defaults.yaml - /deps-cmake.yaml - /deps-docs.yaml - /deps-rocm.yaml - /gpus-rocm.yaml - /rules.yaml stages: - lint - build # Tests if builds succeed (CMake) - test # Tests if unit tests are passing (CTest) variables: # Helper variables PACKAGE_DIR: $BUILD_DIR/package ROCPRIM_GIT_BRANCH: develop_stream copyright-date: extends: - .deps:rocm stage: lint needs: [] tags: - rocm-build rules: - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' script: - cd $CI_PROJECT_DIR - git config --global --add safe.directory $CI_PROJECT_DIR - scripts/copyright-date/check-copyright.sh -v -d $CI_MERGE_REQUEST_DIFF_BASE_SHA .cmake-latest: extends: - .deps:rocm - .deps:cmake-latest before_script: - !reference [".deps:rocm", before_script] - !reference [".deps:cmake-latest", before_script] .cmake-minimum: extends: - .deps:rocm - .deps:cmake-minimum before_script: - !reference [".deps:rocm", before_script] - !reference [".deps:cmake-minimum", before_script] .install-rocprim: script: - branch_name="$ROCPRIM_GIT_BRANCH" - if [ $CI_COMMIT_BRANCH == develop ] || [ $CI_COMMIT_BRANCH == master ]; then branch_name=$CI_COMMIT_BRANCH; - fi; - git clone -b $branch_name https://gitlab-ci-token:${CI_JOB_TOKEN}@${ROCPRIM_GIT_URL} $CI_PROJECT_DIR/rocPRIM - cmake -G Ninja -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D BUILD_TEST=OFF -D BUILD_EXAMPLE=OFF -D ROCM_DEP_ROCMCORE=OFF -S $CI_PROJECT_DIR/rocPRIM -B $CI_PROJECT_DIR/rocPRIM/build - cd $CI_PROJECT_DIR/rocPRIM/build - cpack -G "DEB" - $SUDO_CMD dpkg -i rocprim*.deb .build:common: stage: build extends: - .gpus:rocm-gpus - .rules:build tags: - rocm-build script: - !reference [.install-rocprim, script] # Setup env vars for testing - rng_seed_count=0; prng_seeds="0"; - if [ $CI_COMMIT_BRANCH == develop_stream ] ; then rng_seed_count=3; prng_seeds="0 1000"; - fi; # Build rocThrust - cmake -G Ninja -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D BUILD_TEST=ON -D BUILD_EXAMPLES=ON -D BUILD_BENCHMARKS=ON -D AMDGPU_TARGETS=$GPU_TARGETS -D AMDGPU_TEST_TARGETS=$GPU_TARGETS -D RNG_SEED_COUNT=$rng_seed_count -D PRNG_SEEDS=$prng_seeds -S $CI_PROJECT_DIR -B $CI_PROJECT_DIR/build - cmake --build $CI_PROJECT_DIR/build artifacts: paths: - $CI_PROJECT_DIR/build/test/* - $CI_PROJECT_DIR/build/testing/* - $CI_PROJECT_DIR/build/deps/* - $CI_PROJECT_DIR/build/CMakeCache.txt - $CI_PROJECT_DIR/build/CTestTestfile.cmake - $CI_PROJECT_DIR/build/.ninja_log exclude: - $CI_PROJECT_DIR/build/**/*.o expire_in: 2 weeks build:cmake-latest: stage: build extends: - .cmake-latest - .build:common build:cmake-minimum: stage: build extends: - .cmake-minimum - .build:common build:package: stage: build extends: - .cmake-minimum - .rules:build tags: - rocm-build script: - !reference [.install-rocprim, script] - cmake -S $CI_PROJECT_DIR -B $PACKAGE_DIR -G Ninja -D CMAKE_BUILD_TYPE=Release -D CMAKE_CXX_COMPILER=hipcc - cd $PACKAGE_DIR - cpack -G "DEB;ZIP" artifacts: paths: - $PACKAGE_DIR/rocthrust*.deb - $PACKAGE_DIR/rocthrust*.zip expire_in: 2 weeks test:package: stage: test needs: - build:package extends: - .cmake-minimum - .rules:test tags: - rocm-build script: - !reference [.install-rocprim, script] - $SUDO_CMD dpkg -i $PACKAGE_DIR/rocthrust*.deb # Test install - cmake -S $CI_PROJECT_DIR/extra -B $CI_PROJECT_DIR/package_test -G Ninja -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D ROCPRIM_ROOT=/opt/rocm/rocprim - cmake --build $CI_PROJECT_DIR/package_test # Remove rocPRIM and rocThrust - $SUDO_CMD dpkg -r rocthrust-dev - $SUDO_CMD dpkg -r rocprim-dev test:doc: stage: test extends: - .build:docs - .rules:test test: stage: test extends: - .cmake-minimum - .rules:test - .gpus:rocm needs: - build:cmake-minimum script: - cd $CI_PROJECT_DIR/build - cmake -D CMAKE_PREFIX_PATH=/opt/rocm -P $CI_PROJECT_DIR/cmake/GenerateResourceSpec.cmake - cat ./resources.json # Parallel execution (with other AMDGPU processes) can oversubscribe the SDMA queue. # This causes the hipMemcpy to fail, which is not reported as an error by HIP. # As a temporary workaround, disable the SDMA for test stability. - HSA_ENABLE_SDMA=0 ctest --output-on-failure --repeat-until-fail 2 --tests-regex $GPU_TARGET --resource-spec-file ./resources.json --parallel $PARALLEL_JOBS rocThrust-rocm-5.7.1/.gitmodules000066400000000000000000000001241450263404500166340ustar00rootroot00000000000000[submodule "cub"] path = dependencies/cub url = https://github.com/NVlabs/cub.git rocThrust-rocm-5.7.1/.jenkins/000077500000000000000000000000001450263404500162015ustar00rootroot00000000000000rocThrust-rocm-5.7.1/.jenkins/common.groovy000066400000000000000000000055121450263404500207430ustar00rootroot00000000000000// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean debug=false, boolean sameOrg=true) { project.paths.construct_build_prefix() String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' String buildTypeDir = debug ? 'debug' : 'release' String cmake = platform.jenkinsLabel.contains('centos') ? 'cmake3' : 'cmake' //Set CI node's gfx arch as target if PR, otherwise use default targets of the library String amdgpuTargets = env.BRANCH_NAME.startsWith('PR-') ? '-DAMDGPU_TARGETS=\$gfx_arch' : '' def getRocPRIM = auxiliary.getLibrary('rocPRIM', platform.jenkinsLabel, null, sameOrg) def command = """#!/usr/bin/env bash set -x ${getRocPRIM} cd ${project.paths.project_build_prefix} mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} ${auxiliary.gfxTargetParser()} ${cmake} -DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc ${buildTypeArg} ${amdgpuTargets} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. make -j\$(nproc) """ platform.runCommand(this, command) } def runTestCommand (platform, project) { String sudo = auxiliary.sudo(platform.jenkinsLabel) def testCommand = "ctest --output-on-failure" def hmmTestCommand = '' def excludeRegex = 'reduce_by_key.hip' if (platform.jenkinsLabel.contains('gfx11')) { excludeRegex = /(reduce_by_key.hip|partition.hip|sort.hip|sort_by_key.hip|stable_sort_by_key.hip|stable_sort.hip|async_copy.hip|async_reduce.hip|async_scan.hip|async_sort.hip|async_transform.hip)/ } testCommandExclude = "--exclude-regex \"${excludeRegex}\"" if (platform.jenkinsLabel.contains('gfx90a')) { hmmTestCommand = "" // temporarily disable hmm testing // """ // export HSA_XNACK=1 // export ROCTHRUST_USE_HMM=1 // ${testCommand} ${testCommandExclude} // """ } def command = """ #!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} cd ${project.testDirectory} ${testCommand} ${testCommandExclude} ${hmmTestCommand} """ platform.runCommand(this, command) } def runPackageCommand(platform, project) { def packageHelper = platform.makePackage(platform.jenkinsLabel,"${project.paths.project_build_prefix}/build/release") platform.runCommand(this, packageHelper[0]) platform.archiveArtifacts(this, packageHelper[1]) } return this rocThrust-rocm-5.7.1/.jenkins/precheckin.groovy000066400000000000000000000054361450263404500215730ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path; def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocThrust', 'precheckin') prj.defaults.ccache = true prj.timeout.compile = 420 // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) Set standardJobNameSet = ["compute-rocm-dkms-no-npi", "compute-rocm-dkms-no-npi-hipclang", "rocm-docker"] def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } Set seenJobNames = [] jobNameList.each { jobName, nodeDetails-> seenJobNames.add(jobName) if (urlJobName == jobName) runCI(nodeDetails, jobName) } // For url job names that are outside of the standardJobNameSet i.e. compute-rocm-dkms-no-npi-1901 if(!seenJobNames.contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) runCI([ubuntu16:['gfx906']], urlJobName) } } rocThrust-rocm-5.7.1/.jenkins/staticanalysis.groovy000066400000000000000000000023441450263404500225060ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCompileCommand(platform, project, jobName, boolean debug=false) { project.paths.construct_build_prefix() } def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocThrust', 'StaticAnalysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false boolean staticAnalysis = true def compileCommand = { platform, project-> runCompileCommand(platform, project, jobName, false) } buildProject(prj , formatCheck, nodes.dockerArray, compileCommand, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 6')])])) stage(urlJobName) { runCI([ubuntu20:['any']], urlJobName) } } rocThrust-rocm-5.7.1/.jenkins/staticlibrary.groovy000066400000000000000000000046651450263404500223370ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path; def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocThrust', 'Static Library PreCheckin') prj.defaults.ccache = true prj.timeout.compile = 420 def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, false, true) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu16:['gfx906']], urlJobName) } } } rocThrust-rocm-5.7.1/.readthedocs.yml000066400000000000000000000004131450263404500175460ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/conf.py formats: [htmlzip] python: version: 3.8 install: - requirements: docs/.sphinx/requirements.txt rocThrust-rocm-5.7.1/CHANGELOG.md000066400000000000000000000155761450263404500163110ustar00rootroot00000000000000# Change Log for rocThrust Full documentation for rocThrust is available at [https://rocthrust.readthedocs.io/en/latest/](https://rocthrust.readthedocs.io/en/latest/) ## (Unreleased) rocThrust 2.18.0 for ROCm 5.7 ### Fixed - `lower_bound`, `upper_bound`, and `binary_search` failed to compile for certain types. - Fixed issue where `transform_iterator` would not compile with `__device__`-only operators. ### Changed - Updated `docs` directory structure to match the standard of [rocm-docs-core](https://github.com/RadeonOpenCompute/rocm-docs-core). - Removed references to and workarounds for deprecated hcc ## rocThrust 2.17.0 for ROCm 5.5 ### Added - Updated to match upstream Thrust 1.17.2 ### Fixed - set_difference and set_intersection no longer hang if the number of items is above `UINT_MAX`. Previously, the unit tests for set_difference and set_intersection failed the `TestSetDifferenceWithBigIndexes`. ## rocThrust 2.16.0 for ROCm 5.3 ### Added - Updated to match upstream Thrust 1.16.0 ### Changed - rocThrust functionality dependent on device malloc works is functional as ROCm 5.2 reneabled device malloc. Device launched `thrust::sort` and `thrust::sort_by_key` are available for use. ## rocThrust 2.15.0 for ROCm 5.2 ### Added - Packages for tests and benchmark executable on all supported OSes using CPack. ### Known issues - async_copy, partition, and stable_sort_by_key unit tests are failing on HIP on Windows. ## rocThrust 2.14.0 for ROCm 5.1 ### Added - Updated to match upstream Thrust 1.15.0 ### Known issues - async_copy, partition, and stable_sort_by_key unit tests are failing on HIP on Windows. ## rocThrust 2.13.0 for ROCm 5.0 - Updated to match upstream Thrust 1.13.0 - Updated to match upstream Thrust 1.14.0 - Added async scan ### Changed - Scan algorithms: `inclusive_scan` now uses the input-type as accumulator-type, `exclusive_scan` uses initial-value-type. - This particularly changes behaviour of small-size input types with large-size output types (e.g. `short` input, `int` output). - And low-res input with high-res output (e.g. `float` input, `double` output) ## rocThrust-2.11.2 for ROCm 4.5.0 ### Added - Initial HIP on Windows support. See README for instructions on how to build and install. ### Changed - Packaging changed to a development package (called rocthrust-dev for `.deb` packages, and rocthrust-devel for `.rpm` packages). As rocThrust is a header-only library, there is no runtime package. To aid in the transition, the development package sets the "provides" field to provide the package rocthrust, so that existing packages depending on rocthrust can continue to work. This provides feature is introduced as a deprecated feature and will be removed in a future ROCm release. ### Known issues - async_copy, partition, and stable_sort_by_key unit tests are failing on HIP on Windows. - Mixed type exclusive scan algorithm still not using the initial value type for results type. ## [rocThrust-2.11.1 for ROCm 4.4.0] ### Added - gfx1030 support - Address Sanitizer build option ### Fixed - async_transform unit test failure fixed. ## [rocThrust-2.11.0 for ROCm 4.3.0] ### Added - Updated to match upstream Thrust 1.11 - gfx90a support added - gfx803 support re-enabled ## [rocThrust-2.10.9 for ROCm 4.2.0] ### Added - Updated to match upstream Thrust 1.10 ### Changed - Minimum cmake version required for building rocThrust is now 3.10.2 ### Fixed - Size zero inputs are now properly handled with newer ROCm builds that no longer allow zero-size kernel grid/block dimensions - Warning of unused results fixed. ## [rocThrust-2.10.8 for ROCm 4.1.0] ### Added - No new features ## [rocThrust-2.10.7 for ROCm 4.0.0] ### Added - Updated to upstream Thrust 1.10.0 - Implemented runtime error for unsupported algorithms and disabled respective tests. - Updated CMake to use downloaded rocPRIM. ## [rocThrust-2.10.6 for ROCm 3.10] ### Added - Added copy_if on device test case ### Known issues - ROCm support for device malloc has been disabled. As a result, rocThrust functionality dependent on device malloc does not work. Please avoid using device launched thrust::sort and thrust::sort_by_key. Host launched functionality is not impacted. A partial enablement of device malloc is possible by setting HIP_ENABLE_DEVICE_MALLOC to 1. Thrust::sort and thrust::sort_by_key may work on certain input sizes but is not recommended for production code. ## [rocThrust-2.10.5 for ROCm 3.9.0] ### Added - Updated to upstream Thrust 1.9.8 - New test cases for device-side algorithms ### Fixes - Bugfix for binary search - Implemented workarounds for hipStreamDefault hang ### Known issues - ROCm support for device malloc has been disabled. As a result, rocThrust functionality dependent on device malloc does not work. Please avoid using device launched thrust::sort and thrust::sort_by_key. Host launched functionality is not impacted. A partial enablement of device malloc is possible by setting HIP_ENABLE_DEVICE_MALLOC to 1. Thrust::sort and thrust::sort_by_key may work on certain input sizes but is not recommended for production code. ## [rocThrust-2.10.4 for ROCm 3.8.0] ### Added - No new features ### Known issues - ROCm support for device malloc has been disabled. As a result, rocThrust functionality dependent on device malloc does not work. Please avoid using device launched thrust::sort and thrust::sort_by_key. Host launched functionality is not impacted. A partial enablement of device malloc is possible by setting HIP_ENABLE_DEVICE_MALLOC to 1. Thrust::sort and thrust::sort_by_key may work on certain input sizes but is not recommended for production code. ## [rocThrust-2.10.3 for ROCm 3.7.0] ### Added - Updated to upstream Thrust 1.9.4 ### Changed - Package dependecy change to rocprim only ### Known issues - ROCm support for device malloc has been disabled. As a result, rocThrust functionality dependent on device malloc does not work. Please avoid using device launched thrust::sort and thrust::sort_by_key. Host launched functionality is not impacted. A partial enablement of device malloc is possible by setting HIP_ENABLE_DEVICE_MALLOC to 1. Thrust::sort and thrust::sort_by_key may work on certain input sizes but is not recommended for production code. ## [rocThrust-2.10.2 for ROCm 3.6.0] ### Added - No new features ### Known Issues - ROCm support for device malloc has been disabled. As a result, rocThrust functionality dependent on device malloc does not work. Please avoid using device launched thrust::sort and thrust::sort_by_key. Host launched functionality is not impacted. A partial enablement of device malloc is possible by setting HIP_ENABLE_DEVICE_MALLOC to 1. Thrust::sort and thrust::sort_by_key may work on certain input sizes but is not recommended for production code. ## [rocThrust-2.10.1 for ROCm 3.5.0] ### Added - Improved tests with fixed and random seeds for test data ### Changed - CMake searches for rocThrust locally first; downloads from github if local search fails ### Deprecated - HCC build deprecated rocThrust-rocm-5.7.1/CMakeLists.txt000066400000000000000000000155061450263404500172310ustar00rootroot00000000000000# ######################################################################## # Copyright 2019-2023 Advanced Micro Devices, Inc. # ######################################################################## cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR) # Install prefix if(WIN32) set(CMAKE_INSTALL_PREFIX ${PROJECT_BINARY_DIR}/package CACHE PATH "Install path prefix, prepended onto install directories") else() set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") endif() # Thrust project project(rocthrust LANGUAGES CXX) #Adding CMAKE_PREFIX_PATH list( APPEND CMAKE_PREFIX_PATH /opt/rocm/llvm /opt/rocm ${ROCM_PATH} ) # CMake modules list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip /opt/rocm/lib/cmake/hip # FindHIP.cmake ${HIP_PATH}/cmake /opt/rocm/hip/cmake # FindHIP.cmake ) # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") # rocm-cmake contains common cmake code for rocm projects to help # setup and install include( cmake/FindROCMCmake.cmake ) include( ROCMSetupVersion ) include( ROCMCreatePackage ) include( ROCMInstallTargets ) include( ROCMPackageConfigHelpers ) include( ROCMInstallSymlinks ) include( ROCMHeaderWrapper ) include( ROCMCheckTargetIds ) include( ROCMClients ) # Detect compiler support for target ID # This section is deprecated. Please use rocm_check_target_ids for future use. if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" OUTPUT_VARIABLE CXX_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) endif() #Set the AMDGPU_TARGETS with backward compatiblity if(COMMAND rocm_check_target_ids) rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102" ) else() # Use target ID syntax if supported for AMDGPU_TARGETS if(TARGET_ID_SUPPORT) set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102") else() set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908") endif() endif() set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") # Get dependencies include(cmake/Dependencies.cmake) # Verify that supported compilers are used if (NOT WIN32) include(cmake/VerifyCompiler.cmake) endif() # Build options # Disable -Werror option(DISABLE_WERROR "Disable building with Werror" ON) option(BUILD_TEST "Build tests" OFF) option(BUILD_EXAMPLES "Build examples" OFF) option(BUILD_BENCHMARKS "Build benchmarks" OFF) option(DOWNLOAD_ROCPRIM "Download rocPRIM and do not search for rocPRIM package" OFF) option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) option(ENABLE_UPSTREAM_TESTS "Enable upstream tests" OFF) #Set the header wrapper ON by default. option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" ON) set(RNG_SEED_COUNT 0 CACHE STRING "Number of true random sequences to test each input size for") set(PRNG_SEEDS 1 CACHE STRING "Seeds of pseudo random sequences to test each input size for") set(THRUST_HOST_SYSTEM_OPTIONS CPP OMP TBB) set(THRUST_HOST_SYSTEM CPP CACHE STRING "The device backend to target.") set_property( CACHE THRUST_HOST_SYSTEM PROPERTY STRINGS ${THRUST_HOST_SYSTEM_OPTIONS} ) if (NOT THRUST_HOST_SYSTEM IN_LIST THRUST_HOST_SYSTEM_OPTIONS) message( FATAL_ERROR "THRUST_HOST_SYSTEM must be one of ${THRUST_HOST_SYSTEM_OPTIONS}" ) endif () # Set CXX flags set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(DISABLE_WERROR) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra ") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") if (WIN32) add_compile_options(-xhip) add_compile_definitions(THRUST_IGNORE_DEPRECATED_CPP_DIALECT) endif() # Address Sanitizer if(BUILD_ADDRESS_SANITIZER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") add_link_options(-fuse-ld=lld) endif() # Setup VERSION rocm_setup_version(VERSION "2.18.0") # Print configuration summary include(cmake/Summary.cmake) print_configuration_summary() # Thrust (with HIP backend) add_subdirectory(thrust) if(BUILD_TEST OR BUILD_BENCHMARKS) rocm_package_setup_component(clients) endif() # Tests if(BUILD_TEST) rocm_package_setup_client_component(tests) if (ENABLE_UPSTREAM_TESTS) enable_testing() endif() # We still want the testing to be compiled to catch some errors #TODO: Get testing folder working with HIP on Windows if (NOT WIN32) add_subdirectory(testing) endif() enable_testing() add_subdirectory(test) endif() # Examples if(BUILD_EXAMPLES) add_subdirectory(examples) endif() # Benchmarks if(BUILD_BENCHMARKS) add_subdirectory(internal/benchmark) endif() #Create header wrapper for backward compatibility if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/thrust PATTERNS "*.h" "*.inl" "*.cuh" "*.hpp" HEADER_LOCATION include/thrust GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS rocthrust/${CMAKE_INSTALL_INCLUDEDIR}/thrust OUTPUT_LOCATIONS rocthrust/wrapper/include/thrust ) endif( ) set(THRUST_OPTIONS_DEBUG ${THRUST_OPTIONS_WARNINGS}) set(THRUST_OPTIONS_RELEASE ${THRUST_OPTIONS_WARNINGS}) # Package set(CPACK_DEBIAN_ARCHIVE_TYPE "gnutar") rocm_package_add_deb_dependencies(DEPENDS "rocprim-dev >= 2.10.1") rocm_package_add_rpm_dependencies(DEPENDS "rocprim-devel >= 2.10.1") set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip-thrust, thrust") set(CPACK_RPM_PACKAGE_CONFLICTS "hip-thrust, thrust") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") set(CPACK_RPM_PACKAGE_LICENSE "ASL 2.0") # if(NOT CPACK_PACKAGING_INSTALL_PREFIX) # set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") # endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) rocm_create_package( NAME rocthrust DESCRIPTION "Radeon Open Compute Thrust library" MAINTAINER "rocthrust-maintainer@amd.com" HEADER_ONLY ) rocThrust-rocm-5.7.1/LICENSE000066400000000000000000000236761450263404500155050ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS rocThrust-rocm-5.7.1/NOTICES.txt000066400000000000000000000145241450263404500163350ustar00rootroot00000000000000Notices and licenses file _________________________ AMD copyrighted code (Apache 2.0) Copyright © 2019-2022 Advanced Micro Devices, Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. crascit-downloadproject v-u (MIT) # Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. Dependencies on scipy-scipy v-u (MIT) Copyright (C) 2003-2013 SciPy Developers. Modifications Copyright (C) 2019 Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Enthought nor the names of the SciPy Developers may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS ISâ€� AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Dependencies on thrust-thrust v1.9.2 (Apache 2.0) Copyright 2008-2013 NVIDIA Corporation Modifications Copyright (C) 2019 Advanced Micro Devices, Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rocmsoftwareplatform-rocfft v-u (MIT) Copyright © 2016 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE thrust-thrust v1.9.2 (Apache 2.0) Copyright 2008-2013 NVIDIA Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. thrust-thrust v1.9.2 (BSD3) Copyright (c) 2011, Duane Merrill. All rights reserved. Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. rocThrust-rocm-5.7.1/README.md000066400000000000000000000200011450263404500157320ustar00rootroot00000000000000# rocThrust ## Introduction Thrust is a parallel algorithm library. This library has been ported to [HIP](https://github.com/ROCm-Developer-Tools/HIP)/[ROCm](https://rocm.github.io/) platform, which uses the [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM) library. The HIP ported library works on HIP/ROCm platforms. Currently there is no CUDA backend in place. ## Requirements ### Software * CMake (3.5.1 or later) * AMD [ROCm](https://rocm.docs.amd.com) platform (1.8.0 or later) * Including [HipCC](https://github.com/ROCm-Developer-Tools/HIP) compiler, which must be set as C++ compiler on ROCm platform. * [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM) library * It will be automatically downloaded and built by CMake script. * Python 3.6 or higher (HIP on Windows only, only required for install scripts) * Visual Studio 2019 with clang support (HIP on Windows only) * Strawberry Perl (HIP on Windows only) Optional: * [GTest](https://github.com/google/googletest) * Required only for tests. Building tests is enabled by default. * It will be automatically downloaded and built by CMake script. ### Hardware Visit the following link for ROCm hardware requirements: https://github.com/RadeonOpenCompute/ROCm/blob/master/README.md#supported-cpus ## Build And Install ```sh git clone https://github.com/ROCmSoftwarePlatform/rocThrust # Go to rocThrust directory, create and go to the build directory. cd rocThrust; mkdir build; cd build # Configure rocThrust, setup options for your system. # Build options: # DISABLE_WERROR - ON by default, This flag disable the -Werror compiler flag # BUILD_TEST - OFF by default, # BUILD_EXAMPLES - OFF by default, # BUILD_BENCHMARKS - OFF by default, # DOWNLOAD_ROCPRIM - OFF by default, when ON rocPRIM will be downloaded to the build folder, # RNG_SEED_COUNT - 0 by default, controls non-repeatable random dataset count # PRNG_SEEDS - 1 by default, reproducible seeds to generate random data # # ! IMPORTANT ! # On ROCm platform set C++ compiler to HipCC. You can do it by adding 'CXX=' # before 'cmake' or setting cmake option 'CMAKE_CXX_COMPILER' with the path to the HipCC compiler. # [CXX=hipcc] cmake ../. # or cmake-gui ../. # Build make -j4 # Optionally, run tests if they're enabled. ctest --output-on-failure # Package make package # Install [sudo] make install ``` ### HIP on Windows Initial support for HIP on Windows has been added. To install, use the provided rmake.py python script: ```shell git clone https://github.com/ROCmSoftwarePlatform/rocThrust.git cd rocThrust # the -i option will install rocPRIM to C:\hipSDK by default python rmake.py -i # the -c option will build all clients including unit tests python rmake.py -c ``` ### Macro options ``` # Performance improvement option. If you define THRUST_HIP_PRINTF_ENABLED before # thrust includes to 0, you can disable printfs on device side and improve # performance. The default value is 1 #define THRUST_HIP_PRINTF_ENABLED 0 ``` ### Using rocThrust In A Project Recommended way of including rocThrust into a CMake project is by using its package configuration files. ```cmake # On ROCm rocThrust requires rocPRIM find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") # "/opt/rocm" - default install prefix find_package(rocthrust REQUIRED CONFIG PATHS "/opt/rocm/rocthrust") ... includes rocThrust headers and roc::rocprim_hip target target_link_libraries( roc::rocthrust) ``` ## Running Unit Tests ```sh # Go to rocThrust build directory cd rocThrust; cd build # Configure with examples flag on CXX=hipcc cmake -DBUILD_TEST=ON .. # Build tests make -j4 # To run all tests ctest # To run unit tests for rocThrust ./test/ ``` ### Using multiple GPUs concurrently for testing This feature requires CMake 3.16+ to be used for building / testing. _(Prior versions of CMake cannot assign ids to tests when running in parallel. Assigning tests to distinct devices could only be done at the cost of extreme complexity._) The unit tests can make use of [CTest Resource Allocation](https://cmake.org/cmake/help/latest/manual/ctest.1.html#resource-allocation) feature enabling distributing tests across multiple GPUs in an intelligent manner. The feature can accelerate testing when multiple GPUs of the same family are in a system as well as test multiple family of products from one invocation without having to resort to `HIP_VISIBLE_DEVICES` environment variable. The feature relies on the presence of a resource spec file. > IMPORTANT: trying to use `RESOURCE_GROUPS` and `--resource-spec-file` with CMake/CTest respectively of versions prior to 3.16 omits the feature silently. No warnings issued about unknown properties or command-line arguments. Make sure that `cmake`/`ctest` invoked are sufficiently recent. #### Auto resource spec generation There is a utility script in the repo that may be called independently: ```shell # Go to rocThrust build directory cd rocThrust; cd build # Invoke directly or use CMake script mode via cmake -P ../cmake/GenerateResourceSpec.cmake # Assuming you have 2 compatible GPUs in the system ctest --resource-spec-file ./resources.json --parallel 2 ``` #### Manual Assuming the user has 2 GPUs from the gfx900 family and they are the first devices enumerated by the system one may specify during configuration `-D AMDGPU_TEST_TARGETS=gfx900` stating only one family will be tested. Leaving this var empty (default) results in targeting the default device in the system. To let CMake know there are 2 GPUs that should be targeted, one has to feed CTest a JSON file via the `--resource-spec-file ` flag. For example: ```json { "version": { "major": 1, "minor": 0 }, "local": [ { "gfx900": [ { "id": "0" }, { "id": "1" } ] } ] } ``` ## Using custom seeds for the tests There are 2 CMake configuration-time options that control random data fed to unit tests. - `RNG_SEED_COUNT`, (0 by default) controls non-repeatable random dataset count. It draws values from a default constructed `std::random_device`. Should tests fail, the actual seed producing the failure are reported by Gtest, enabling reproducibility. - `PRNG_SEEDS`, (1 by default) controls repeatable dataset seeds. It is a CMake formatted (semi-colon delimited) array of 32-bit unsigned integrals. - _(Note: semi-colons often collide with shell command parsing. It is advised to escape the entire CMake CLI argument to avoid the variable itself picking up quotation marks. Pass `cmake "-DPRNG_SEEDS=1;2;3;4"` instead of `cmake -DPRNG_SEEDS="1;2;3;4"`, the two cases differ in how the CMake executable receives its arguments from the OS.)_ ## Running Examples ```sh # Go to rocThrust build directory cd rocThrust; cd build # Configure with examples flag on CXX=hipcc cmake -DBUILD_EXAMPLES=ON .. # Build examples make -j4 # Run the example you want to run # ./examples/example_thrust_ # For example: ./examples/example_thrust_version # Example for linking with cpp files ./examples/cpp_integration/example_thrust_linking ``` ## Running Benchmarks ```sh # Go to rocThrust build directory cd rocThrust; cd build # Configure with benchmarks flag on CXX=hipcc cmake -DBUILD_BENCHMARKS=ON .. # Build benchmarks make -j4 # Run the benchmarks ./benchmarks/benchmark_thrust_bench ``` ## Documentation Documentation is available [here](https://rocthrust.readthedocs.io/en/latest/). It can also be build using the following commands: ```shell # Go to rocThrust docs directory cd rocThrust; cd docs # Install Python dependencies python3 -m pip install -r .sphinx/requirements.txt # Build the documentation python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html # For e.g. serve the HTML docs locally cd _build/html python3 -m http.server ``` ## Support Bugs and feature requests can be reported through [the issue tracker](https://github.com/ROCmSoftwarePlatform/rocThrust/issues). ## License rocThrust is distributed under the [Apache 2.0 LICENSE](./LICENSE). rocThrust-rocm-5.7.1/cmake/000077500000000000000000000000001450263404500155425ustar00rootroot00000000000000rocThrust-rocm-5.7.1/cmake/Dependencies.cmake000066400000000000000000000046321450263404500211370ustar00rootroot00000000000000# ######################################################################## # Copyright 2019-2023 Advanced Micro Devices, Inc. # ######################################################################## # ########################### # rocThrust dependencies # ########################### # HIP dependency is handled earlier in the project cmake file # when VerifyCompiler.cmake is included. # For downloading, building, and installing required dependencies include(cmake/DownloadProject.cmake) # rocPRIM (https://github.com/ROCmSoftwarePlatform/rocPRIM) if(NOT DOWNLOAD_ROCPRIM) find_package(rocprim QUIET) endif() if(NOT rocprim_FOUND) message(STATUS "Downloading and building rocprim.") download_project( PROJ rocprim GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocPRIM.git GIT_TAG develop INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/deps/rocprim CMAKE_ARGS -DBUILD_TEST=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm LOG_DOWNLOAD TRUE LOG_CONFIGURE TRUE LOG_BUILD TRUE LOG_INSTALL TRUE BUILD_PROJECT TRUE UPDATE_DISCONNECTED TRUE # Never update automatically from the remote repository ) find_package(rocprim REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/deps/rocprim NO_DEFAULT_PATH) endif() # Test dependencies if(BUILD_TEST) if(NOT DEPENDENCIES_FORCE_DOWNLOAD) # Google Test (https://github.com/google/googletest) find_package(GTest QUIET) else() message(STATUS "Force installing GTest.") endif() if(NOT TARGET GTest::GTest AND NOT TARGET GTest::gtest) message(STATUS "GTest not found or force download GTest on. Downloading and building GTest.") set(GTEST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/gtest CACHE PATH "") download_project( PROJ googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG release-1.11.0 INSTALL_DIR ${GTEST_ROOT} CMAKE_ARGS -DBUILD_GTEST=ON -DINSTALL_GTEST=ON -Dgtest_force_shared_crt=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX= LOG_DOWNLOAD TRUE LOG_CONFIGURE TRUE LOG_BUILD TRUE LOG_INSTALL TRUE BUILD_PROJECT TRUE UPDATE_DISCONNECTED TRUE ) find_package(GTest REQUIRED CONFIG PATHS ${GTEST_ROOT}) endif() endif() rocThrust-rocm-5.7.1/cmake/DownloadProject.CMakeLists.cmake.in000066400000000000000000000020011450263404500242360ustar00rootroot00000000000000# Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. cmake_minimum_required(VERSION 2.8.2) project(${DL_ARGS_PROJ}-download NONE) include(ExternalProject) if(${DL_ARGS_BUILD_PROJECT}) ExternalProject_Add(${DL_ARGS_PROJ}-download ${DL_ARGS_UNPARSED_ARGUMENTS} SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" BUILD_IN_SOURCE TRUE TEST_COMMAND "" ) else() ExternalProject_Add(${DL_ARGS_PROJ}-download ${DL_ARGS_UNPARSED_ARGUMENTS} SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" BUILD_IN_SOURCE TRUE TEST_COMMAND "" UPDATE_COMMAND "" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" ) endif() rocThrust-rocm-5.7.1/cmake/DownloadProject.cmake000066400000000000000000000167451450263404500216570ustar00rootroot00000000000000# Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. # # MODULE: DownloadProject # # PROVIDES: # download_project( PROJ projectName # [PREFIX prefixDir] # [DOWNLOAD_DIR downloadDir] # [SOURCE_DIR srcDir] # [BINARY_DIR binDir] # [QUIET] # ... # ) # # Provides the ability to download and unpack a tarball, zip file, git repository, # etc. at configure time (i.e. when the cmake command is run). How the downloaded # and unpacked contents are used is up to the caller, but the motivating case is # to download source code which can then be included directly in the build with # add_subdirectory() after the call to download_project(). Source and build # directories are set up with this in mind. # # The PROJ argument is required. The projectName value will be used to construct # the following variables upon exit (obviously replace projectName with its actual # value): # # projectName_SOURCE_DIR # projectName_BINARY_DIR # # The SOURCE_DIR and BINARY_DIR arguments are optional and would not typically # need to be provided. They can be specified if you want the downloaded source # and build directories to be located in a specific place. The contents of # projectName_SOURCE_DIR and projectName_BINARY_DIR will be populated with the # locations used whether you provide SOURCE_DIR/BINARY_DIR or not. # # The DOWNLOAD_DIR argument does not normally need to be set. It controls the # location of the temporary CMake build used to perform the download. # # The PREFIX argument can be provided to change the base location of the default # values of DOWNLOAD_DIR, SOURCE_DIR and BINARY_DIR. If all of those three arguments # are provided, then PREFIX will have no effect. The default value for PREFIX is # CMAKE_BINARY_DIR. # # The QUIET option can be given if you do not want to show the output associated # with downloading the specified project. # # In addition to the above, any other options are passed through unmodified to # ExternalProject_Add() to perform the actual download, patch and update steps. # # Only those ExternalProject_Add() arguments which relate to downloading, patching # and updating of the project sources are intended to be used. Also note that at # least one set of download-related arguments are required. # # If using CMake 3.2 or later, the UPDATE_DISCONNECTED option can be used to # prevent a check at the remote end for changes every time CMake is run # after the first successful download. See the documentation of the ExternalProject # module for more information. It is likely you will want to use this option if it # is available to you. Note, however, that the ExternalProject implementation contains # bugs which result in incorrect handling of the UPDATE_DISCONNECTED option when # using the URL download method or when specifying a SOURCE_DIR with no download # method. Fixes for these have been created, the last of which is scheduled for # inclusion in CMake 3.8.0. Details can be found here: # # https://gitlab.kitware.com/cmake/cmake/commit/bdca68388bd57f8302d3c1d83d691034b7ffa70c # https://gitlab.kitware.com/cmake/cmake/issues/16428 # # If you experience build errors related to the update step, consider avoiding # the use of UPDATE_DISCONNECTED. # # EXAMPLE USAGE: # # include(DownloadProject) # download_project(PROJ googletest # GIT_REPOSITORY https://github.com/google/googletest.git # GIT_TAG master # UPDATE_DISCONNECTED 1 # QUIET # ) # # add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) # #======================================================================================== set(_DownloadProjectDir "${CMAKE_CURRENT_LIST_DIR}") include(CMakeParseArguments) function(download_project) set(options QUIET) set(oneValueArgs PROJ PREFIX DOWNLOAD_DIR SOURCE_DIR BINARY_DIR BUILD_PROJECT ) set(multiValueArgs "") cmake_parse_arguments(DL_ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) # Hide output if requested if (DL_ARGS_QUIET) set(OUTPUT_QUIET "OUTPUT_QUIET") else() unset(OUTPUT_QUIET) message(STATUS "Downloading/updating ${DL_ARGS_PROJ}") endif() # Set up where we will put our temporary CMakeLists.txt file and also # the base point below which the default source and binary dirs will be. # The prefix must always be an absolute path. if (NOT DL_ARGS_PREFIX) set(DL_ARGS_PREFIX "${CMAKE_BINARY_DIR}") else() get_filename_component(DL_ARGS_PREFIX "${DL_ARGS_PREFIX}" ABSOLUTE BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}") endif() if (NOT DL_ARGS_DOWNLOAD_DIR) set(DL_ARGS_DOWNLOAD_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-download") endif() # Ensure the caller can know where to find the source and build directories if (NOT DL_ARGS_SOURCE_DIR) set(DL_ARGS_SOURCE_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-src") endif() if (NOT DL_ARGS_BINARY_DIR) set(DL_ARGS_BINARY_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-build") endif() set(${DL_ARGS_PROJ}_SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" PARENT_SCOPE) set(${DL_ARGS_PROJ}_BINARY_DIR "${DL_ARGS_BINARY_DIR}" PARENT_SCOPE) # The way that CLion manages multiple configurations, it causes a copy of # the CMakeCache.txt to be copied across due to it not expecting there to # be a project within a project. This causes the hard-coded paths in the # cache to be copied and builds to fail. To mitigate this, we simply # remove the cache if it exists before we configure the new project. It # is safe to do so because it will be re-generated. Since this is only # executed at the configure step, it should not cause additional builds or # downloads. file(REMOVE "${DL_ARGS_DOWNLOAD_DIR}/CMakeCache.txt") # Create and build a separate CMake project to carry out the download. # If we've already previously done these steps, they will not cause # anything to be updated, so extra rebuilds of the project won't occur. # Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project # has this set to something not findable on the PATH. configure_file("${_DownloadProjectDir}/DownloadProject.CMakeLists.cmake.in" "${DL_ARGS_DOWNLOAD_DIR}/CMakeLists.txt") execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" -D "CMAKE_MAKE_PROGRAM:FILE=${CMAKE_MAKE_PROGRAM}" . RESULT_VARIABLE result ${OUTPUT_QUIET} WORKING_DIRECTORY "${DL_ARGS_DOWNLOAD_DIR}" ) if(result) message(FATAL_ERROR "CMake step for ${DL_ARGS_PROJ} failed: ${result}") endif() execute_process(COMMAND ${CMAKE_COMMAND} --build . RESULT_VARIABLE result ${OUTPUT_QUIET} WORKING_DIRECTORY "${DL_ARGS_DOWNLOAD_DIR}" ) if(result) message(FATAL_ERROR "Build step for ${DL_ARGS_PROJ} failed: ${result}") endif() endfunction() rocThrust-rocm-5.7.1/cmake/FindROCMCmake.cmake000066400000000000000000000036271450263404500210560ustar00rootroot00000000000000# ######################################################################## # Copyright 2021 Advanced Micro Devices, Inc. # ######################################################################## # ########################### # rocThrust dependencies # ########################### # For downloading, building, and installing required dependencies include(cmake/DownloadProject.cmake) set(PROJECT_EXTERN_DIR ${CMAKE_CURRENT_BINARY_DIR}/extern) # By default, rocm software stack is expected at /opt/rocm # set environment variable ROCM_PATH to change location if(NOT ROCM_PATH) set(ROCM_PATH /opt/rocm) endif() find_package(ROCM 0.7.3 CONFIG QUIET PATHS ${ROCM_PATH} /opt/rocm) if(NOT ROCM_FOUND) set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") set(rocm_cmake_url "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip") set(rocm_cmake_path "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}") set(rocm_cmake_archive "${rocm_cmake_path}.zip") file(DOWNLOAD "${rocm_cmake_url}" "${rocm_cmake_archive}" STATUS status LOG log) list(GET status 0 status_code) list(GET status 1 status_string) if(status_code EQUAL 0) message(STATUS "downloading... done") else() message(FATAL_ERROR "error: downloading\n'${rocm_cmake_url}' failed status_code: ${status_code} status_string: ${status_string} log: ${log}\n") endif() execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf "${rocm_cmake_archive}" WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) execute_process( COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=${PROJECT_EXTERN_DIR}/rocm-cmake . WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} ) execute_process( COMMAND ${CMAKE_COMMAND} --build rocm-cmake-${rocm_cmake_tag} --target install WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) find_package( ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake ) endif() rocThrust-rocm-5.7.1/cmake/GenerateResourceSpec.cmake000077500000000000000000000065771450263404500226430ustar00rootroot00000000000000#!/usr/bin/cmake -P find_program(ROCMINFO_EXECUTABLE rocminfo ) if(NOT ROCMINFO_EXECUTABLE) message(FATAL_ERROR "rocminfo not found") endif() execute_process( COMMAND ${ROCMINFO_EXECUTABLE} RESULT_VARIABLE ROCMINFO_EXIT_CODE OUTPUT_VARIABLE ROCMINFO_STDOUT ERROR_VARIABLE ROCMINFO_STDERR ) if(ROCMINFO_EXIT_CODE) message(SEND_ERROR "rocminfo exited with ${ROCMINFO_EXIT_CODE}") message(FATAL_ERROR ${ROCMINFO_STDERR}) endif() string(REGEX MATCHALL [[--(gfx[0-9]+)]] ROCMINFO_MATCHES ${ROCMINFO_STDOUT} ) # NOTE: Unfortunately we don't have structs in CMake, # neither do we have std::partition only list(SORT) # # Transform raw regex matches to pairs of gfx IP and device id # This will be our struct emulation. In C++ it would be # # struct device # { # std::string ip; # int id; # }; # # std::vector GFXIP_AND_ID{ {"gfx900",0},{"gfx803",1},{"gfx900",2} }; # std::sort(GFXIP_AND_ID.begin(), GFXIP_AND_ID.end(), # [](const device& lhs, const device& rhs) # { # return std::lexicographical_compare(lhs.ip.begin(), lhs.ip.end(), # rhs.ip.begin(), rhs.ip.end()); # }); # set(GFXIP_AND_ID) set(ID 0) foreach(ROCMINFO_MATCH IN LISTS ROCMINFO_MATCHES) string(REGEX REPLACE "--" "" ROCMINFO_MATCH ${ROCMINFO_MATCH} ) list(APPEND GFXIP_AND_ID "${ROCMINFO_MATCH}:${ID}") math(EXPR ID "${ID} + 1") endforeach() list(SORT GFXIP_AND_ID) # Now comes the tricky part: implementing the following C++ logic # # std::stringstream JSON_PAYLOAD; # auto it = GFXIP_AND_ID.begin(); # while (it != GFXIP_AND_ID.end()) # { # auto IT = std::find_if(it, GFXIP_AND_ID.end(), # [=](const device& ip_id){ return ip_id.ip.compare(it->ip) != 0; }); # JSON_PAYLOAD << "\n \"" << it->ip << "\": ["; # std::for_each(it, IT, [&](const device& ip_id) # { # JSON_PAYLOAD << # "\n {\n" << # " \"id\": \"" << ip_id.id << "\"\n" << # " },"; # }); # JSON_PAYLOAD.seekp(-1, std::ios_base::end); // discard trailing comma # JSON_PAYLOAD << "\n ],"; # it = IT; # } # JSON_PAYLOAD.seekp(-1, std::ios_base::end); // discard trailing comma # set(JSON_PAYLOAD) set(IT1 0) list(GET GFXIP_AND_ID ${IT1} I1) string(REGEX REPLACE ":[0-9]+" "" IP1 ${I1}) list(LENGTH GFXIP_AND_ID COUNT) while(IT1 LESS COUNT) string(APPEND JSON_PAYLOAD "\n \"${IP1}\": [") set(IT2 ${IT1}) list(GET GFXIP_AND_ID ${IT2} I2) string(REGEX REPLACE [[:[0-9]+$]] "" IP2 ${I2}) string(REGEX REPLACE [[^gfx[0-9]+:]] "" ID2 ${I2}) while(${IP2} STREQUAL ${IP1} AND IT2 LESS COUNT) string(APPEND JSON_PAYLOAD "\n {\n" " \"id\": \"${ID2}\"\n" " }," ) math(EXPR IT2 "${IT2} + 1") if(IT2 LESS COUNT) list(GET GFXIP_AND_ID ${IT2} I2) string(REGEX REPLACE [[:[0-9]+$]] "" IP2 ${I2}) string(REGEX REPLACE [[^gfx[0-9]+:]] "" ID2 ${I2}) endif() endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) string(APPEND JSON_PAYLOAD "\n ],") set(IT1 ${IT2}) endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) set(JSON_HEAD [[{ "version": { "major": 1, "minor": 0 }, "local": [ {]] ) set(JSON_TAIL [[ } ] }]] ) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/resources.json ${JSON_HEAD} ${JSON_PAYLOAD} ${JSON_TAIL} )rocThrust-rocm-5.7.1/cmake/ROCMExportTargetsHeaderOnly.cmake000066400000000000000000000133241450263404500240160ustar00rootroot00000000000000# MIT License # # Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # This file is a worksaround for issues rocm-cmake packaging style and PyTorch. # TODO: remove when there is a fix for this issue in either rocm-cmake or PyTorch. include(CMakeParseArguments) include(GNUInstallDirs) include(ROCMPackageConfigHelpers) include(ROCMInstallTargets) set(ROCM_INSTALL_LIBDIR lib) function(rocm_write_package_template_function_if FILENAME NAME CHECK_VARIABLE) string(REPLACE ";" " " ARGS "${ARGN}") file(APPEND ${FILENAME} " if(NOT (DEFINED ${CHECK_VARIABLE} AND ${CHECK_VARIABLE}) ) ${NAME}(${ARGS}) endif() ") endfunction() function(rocm_export_targets_header_only) set(options) set(oneValueArgs NAMESPACE EXPORT NAME COMPATIBILITY PREFIX) set(multiValueArgs TARGETS DEPENDS INCLUDE) cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(PACKAGE_NAME ${PROJECT_NAME}) if(PARSE_NAME) set(PACKAGE_NAME ${PARSE_NAME}) endif() string(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UPPER) string(TOLOWER ${PACKAGE_NAME} PACKAGE_NAME_LOWER) set(TARGET_FILE ${PACKAGE_NAME_LOWER}-targets) if(PARSE_EXPORT) set(TARGET_FILE ${PARSE_EXPORT}) endif() set(CONFIG_NAME ${PACKAGE_NAME_LOWER}-config) set(TARGET_VERSION ${PROJECT_VERSION}) if(PARSE_PREFIX) set(PREFIX_DIR ${PARSE_PREFIX}) set(PREFIX_ARG PREFIX ${PREFIX_DIR}) set(BIN_INSTALL_DIR ${PREFIX_DIR}/${CMAKE_INSTALL_BINDIR}) set(LIB_INSTALL_DIR ${PREFIX_DIR}/${ROCM_INSTALL_LIBDIR}) set(INCLUDE_INSTALL_DIR ${PREFIX_DIR}/${CMAKE_INSTALL_INCLUDEDIR}) else() set(BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}) set(LIB_INSTALL_DIR ${ROCM_INSTALL_LIBDIR}) set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) endif() set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/${PACKAGE_NAME_LOWER}) set(CONFIG_TEMPLATE "${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_LOWER}-config.cmake.in") file(WRITE ${CONFIG_TEMPLATE} " @PACKAGE_INIT@ ") foreach(NAME ${PACKAGE_NAME} ${PACKAGE_NAME_UPPER} ${PACKAGE_NAME_LOWER}) rocm_write_package_template_function(${CONFIG_TEMPLATE} set_and_check ${NAME}_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") rocm_write_package_template_function(${CONFIG_TEMPLATE} set_and_check ${NAME}_INCLUDE_DIRS "@PACKAGE_INCLUDE_INSTALL_DIR@") endforeach() rocm_write_package_template_function(${CONFIG_TEMPLATE} set_and_check ${PACKAGE_NAME}_TARGET_FILE "@PACKAGE_CONFIG_PACKAGE_INSTALL_DIR@/${TARGET_FILE}.cmake") if(PARSE_DEPENDS) rocm_list_split(PARSE_DEPENDS PACKAGE DEPENDS_LIST) foreach(DEPEND ${DEPENDS_LIST}) rocm_write_package_template_function(${CONFIG_TEMPLATE} find_dependency ${${DEPEND}}) endforeach() endif() foreach(INCLUDE ${PARSE_INCLUDE}) rocm_install(FILES ${INCLUDE} DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}) get_filename_component(INCLUDE_BASE ${INCLUDE} NAME) rocm_write_package_template_function(${CONFIG_TEMPLATE} include "\${CMAKE_CURRENT_LIST_DIR}/${INCLUDE_BASE}") endforeach() if(PARSE_TARGETS) rocm_write_package_template_function(${CONFIG_TEMPLATE} include "\${${PACKAGE_NAME}_TARGET_FILE}") foreach(NAME ${PACKAGE_NAME} ${PACKAGE_NAME_UPPER} ${PACKAGE_NAME_LOWER}) rocm_write_package_template_function_if(${CONFIG_TEMPLATE} set PYTORCH_FOUND_HIP ${NAME}_LIBRARIES ${PARSE_TARGETS}) rocm_write_package_template_function_if(${CONFIG_TEMPLATE} set PYTORCH_FOUND_HIP ${NAME}_LIBRARY ${PARSE_TARGETS}) endforeach() endif() rocm_configure_package_config_file( ${CONFIG_TEMPLATE} ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}.cmake INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} ${PREFIX_ARG} PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR CONFIG_PACKAGE_INSTALL_DIR ) set(COMPATIBILITY_ARG SameMajorVersion) if(PARSE_COMPATIBILITY) set(COMPATIBILITY_ARG ${PARSE_COMPATIBILITY}) endif() write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}-version.cmake VERSION ${TARGET_VERSION} COMPATIBILITY ${COMPATIBILITY_ARG} ) set(NAMESPACE_ARG) if(PARSE_NAMESPACE) set(NAMESPACE_ARG "NAMESPACE;${PARSE_NAMESPACE}") endif() rocm_install( EXPORT ${TARGET_FILE} DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} ${NAMESPACE_ARG} ) rocm_install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}.cmake ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}-version.cmake DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}) endfunction() rocThrust-rocm-5.7.1/cmake/Summary.cmake000066400000000000000000000045641450263404500202120ustar00rootroot00000000000000# MIT License # # Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. function (print_configuration_summary) message(STATUS "") message(STATUS "******** Summary ********") message(STATUS "General:") message(STATUS " System : ${CMAKE_SYSTEM_NAME}") message(STATUS " HIP ROOT : ${HIP_ROOT_DIR}") message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP) message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}") message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") message(STATUS " Install prefix : ${CMAKE_INSTALL_PREFIX}") if(HIP_COMPILER STREQUAL "clang") message(STATUS " Device targets : ${AMDGPU_TARGETS}") endif() message(STATUS "") message(STATUS " DISABLE_WERROR : ${DISABLE_WERROR}") message(STATUS " DOWNLOAD_ROCPRIM : ${DOWNLOAD_ROCPRIM}") message(STATUS " BUILD_TEST : ${BUILD_TEST}") message(STATUS " BUILD_EXAMPLES : ${BUILD_EXAMPLES}") message(STATUS " BUILD_BENCHMARKS : ${BUILD_BENCHMARKS}") message(STATUS " BUILD_ADDRESS_SANITIZER : ${BUILD_ADDRESS_SANITIZER}") endfunction() rocThrust-rocm-5.7.1/cmake/VerifyCompiler.cmake000066400000000000000000000032511450263404500215040ustar00rootroot00000000000000# MIT License # # Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip) find_package(hip REQUIRED CONFIG PATHS /opt/rocm) if(HIP_COMPILER STREQUAL "nvcc") message(FATAL_ERROR "rocThrust does not support the CUDA backend.") elseif(HIP_COMPILER STREQUAL "clang") if(NOT (CMAKE_CXX_COMPILER MATCHES ".*hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+")) message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.") endif() else() message(FATAL_ERROR "HIP_COMPILER must be `clang` (AMD ROCm platform)") endif() rocThrust-rocm-5.7.1/cub000077700000000000000000000000001450263404500210442dependencies/cub/cubustar00rootroot00000000000000rocThrust-rocm-5.7.1/custom.properties000066400000000000000000000001401450263404500201050ustar00rootroot00000000000000booktitle=rocTHRUST API Guide spreadsheet.xml=docs/classification-map.xml document.locale=enusrocThrust-rocm-5.7.1/dependencies/000077500000000000000000000000001450263404500171105ustar00rootroot00000000000000rocThrust-rocm-5.7.1/dependencies/cub/000077500000000000000000000000001450263404500176615ustar00rootroot00000000000000rocThrust-rocm-5.7.1/doc/000077500000000000000000000000001450263404500152275ustar00rootroot00000000000000rocThrust-rocm-5.7.1/doc/CHANGELOG.md000066400000000000000000002673761450263404500170650ustar00rootroot00000000000000# Changelog ## Thrust 1.17.2 ### Summary Thrust 1.17.2 is a minor bugfix release that provides an updated version of CUB. ## Thrust 1.17.1 ### Summary Thrust 1.17.1 is a minor bugfix release that provides an updated version of CUB. ## Thrust 1.17.0 ### Summary Thrust 1.17.0 is the final minor release of the 1.X series. This release provides GDB pretty-printers for device vectors/references, a new `unique_count` algorithm, and an easier way to create tagged Thrust iterators. Several documentation fixes are included, which can be found on the new Thrust documentation site at https://nvidia.github.io/thrust. We'll be migrating existing documentation sources to this new location over the next few months. ### New Features - NVIDIA/thrust#1586: Add new `thrust::make_tagged_iterator` convenience function. Thanks to @karthikeyann for this contribution. - NVIDIA/thrust#1619: Add `unique_count` algorithm. Thanks to @upsj for this contribution. - NVIDIA/thrust#1631: Add GDB pretty-printers for device vectors/references to `scripts/gdb-pretty-printers.py`. Thanks to @upsj for this contribution. ### Bug Fixes - NVIDIA/thrust#1671: Fixed `reduce_by_key` when called with 2^31 elements. ### Other Enhancements - NVIDIA/thrust#1512: Use CUB to implement `adjacent_difference`. - NVIDIA/thrust#1555: Use CUB to implement `scan_by_key`. - NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation at https://nvidia.github.io/thrust. - NVIDIA/thrust#1639: Fixed broken link in documentation. Thanks to @jrhemstad for this contribution. - NVIDIA/thrust#1644: Increase contrast of search input text in new doc site. Thanks to @bdice for this contribution. - NVIDIA/thrust#1647: Add `__forceinline__` annotations to a functor wrapper. Thanks to @mkuron for this contribution. - NVIDIA/thrust#1660: Fixed typo in documentation example for `permutation_iterator`. - NVIDIA/thrust#1669: Add a new `explicit_cuda_stream.cu` example that shows how to use explicit CUDA streams and `par`/`par_nosync` execution policies. ## Thrust 1.16.0 ### Summary Thrust 1.16.0 provides a new “nosync” hint for the CUDA backend, as well as numerous bugfixes and stability improvements. #### New `thrust::cuda::par_nosync` Execution Policy Most of Thrust’s parallel algorithms are fully synchronous and will block the calling CPU thread until all work is completed. This design avoids many pitfalls associated with asynchronous GPU programming, resulting in simpler and less-error prone usage for new CUDA developers. Unfortunately, this improvement in user experience comes at a performance cost that often frustrates more experienced CUDA programmers. Prior to this release, the only synchronous-to-asynchronous migration path for existing Thrust codebases involved significant refactoring, replacing calls to `thrust` algorithms with a limited set of `future`-based `thrust::async` algorithms or lower-level CUB kernels. The new `thrust::cuda::par_nosync` execution policy provides a new, less-invasive entry point for asynchronous computation. `par_nosync` is a hint to the Thrust execution engine that any non-essential internal synchronizations should be skipped and that an explicit synchronization will be performed by the caller before accessing results. While some Thrust algorithms require internal synchronization to safely compute their results, many do not. For example, multiple `thrust::for_each` invocations can be launched without waiting for earlier calls to complete: ```cpp // Queue three `for_each` kernels: thrust::for_each(thrust::cuda::par_nosync, vec1.begin(), vec1.end(), Op{}); thrust::for_each(thrust::cuda::par_nosync, vec2.begin(), vec2.end(), Op{}); thrust::for_each(thrust::cuda::par_nosync, vec3.begin(), vec3.end(), Op{}); // Do other work while kernels execute: do_something(); // Must explictly synchronize before accessing `for_each` results: cudaDeviceSynchronize(); ``` Thanks to @fkallen for this contribution. ### Deprecation Notices #### CUDA Dynamic Parallelism Support **A future version of Thrust will remove support for CUDA Dynamic Parallelism (CDP).** This will only affect calls to Thrust algorithms made from CUDA device-side code that currently launches a kernel; such calls will instead execute sequentially on the calling GPU thread instead of launching a device-wide kernel. ### Breaking Changes - Thrust 1.14.0 included a change that aliased the `cub` namespace to `thrust::cub`. This has caused issues with ambiguous namespaces for projects that declare `using namespace thrust;` from the global namespace. We recommend against this practice. - NVIDIA/thrust#1572: Removed several unnecessary header includes. Downstream projects may need to update their includes if they were relying on this behavior. ### New Features - NVIDIA/thrust#1568: Add `thrust::cuda::par_nosync` policy. Thanks to @fkallen for this contribution. ### Enhancements - NVIDIA/thrust#1511: Use CUB’s new `DeviceMergeSort` API and remove Thrust’s internal implementation. - NVIDIA/thrust#1566: Improved performance of `thrust::shuffle`. Thanks to @djns99 for this contribution. - NVIDIA/thrust#1584: Support user-defined `CMAKE_INSTALL_INCLUDEDIR` values in Thrust’s CMake install rules. Thanks to @robertmaynard for this contribution. ### Bug Fixes - NVIDIA/thrust#1496: Fix some issues affecting `icc` builds. - NVIDIA/thrust#1552: Fix some collisions with the `min`/`max` macros defined in `windows.h`. - NVIDIA/thrust#1582: Fix issue with function type alias on 32-bit MSVC builds. - NVIDIA/thrust#1591: Workaround issue affecting compilation with `nvc++`. - NVIDIA/thrust#1597: Fix some collisions with the `small` macro defined in `windows.h`. - NVIDIA/thrust#1599, NVIDIA/thrust#1603: Fix some issues with version handling in Thrust’s CMake packages. - NVIDIA/thrust#1614: Clarify that scan algorithm results are non-deterministic for pseudo-associative operators (e.g. floating-point addition). ## Thrust 1.15.0 ### Summary Thrust 1.15.0 provides numerous bugfixes, including non-numeric `thrust::sequence` support, several MSVC-related compilation fixes, fewer conversion warnings, `counting_iterator` initialization, and documentation updates. ### Deprecation Notices **A future version of Thrust will remove support for CUDA Dynamic Parallelism (CDP).** This will only affect calls to Thrust algorithms made from CUDA device-side code that currently launches a kernel; such calls will instead execute sequentially on the calling GPU thread instead of launching a device-wide kernel. ### Bug Fixes - NVIDIA/thrust#1507: Allow `thrust::sequence` to work with non-numeric types. Thanks to Ben Jude (@bjude) for this contribution. - NVIDIA/thrust#1509: Avoid macro collision when calling `max()` on MSVC. Thanks to Thomas (@tomintheshell) for this contribution. - NVIDIA/thrust#1514: Initialize all members in `counting_iterator`'s default constructor. - NVIDIA/thrust#1518: Fix `std::allocator_traits` on MSVC + C++17. - NVIDIA/thrust#1530: Fix several `-Wconversion` warnings. Thanks to Matt Stack (@matt-stack) for this contribution. - NVIDIA/thrust#1539: Fixed typo in `thrust::for_each` documentation. Thanks to Salman (@untamedImpala) for this contribution. - NVIDIA/thrust#1548: Avoid name collision with `B0` macro in termios.h system header. Thanks to Philip Deegan (@PhilipDeegan) for this contribution. ## Thrust 1.14.0 (NVIDIA HPC SDK 21.9) Thrust 1.14.0 is a major release accompanying the NVIDIA HPC SDK 21.9. This release adds the ability to wrap the `thrust::` namespace in an external namespace, providing a workaround for a variety of shared library linking issues. Thrust also learned to detect when CUB's symbols are in a wrapped namespace and properly import them. To enable this feature, use `#define THRUST_CUB_WRAPPED_NAMESPACE foo` to wrap both Thrust and CUB in the `foo::` namespace. See `thrust/detail/config/namespace.h` for details and more namespace options. Several bugfixes are also included: The `tuple_size` and `tuple_element` helpers now support cv-qualified types. `scan_by_key` uses less memory. `thrust::iterator_traits` is better integrated with `std::iterator_traits`. See below for more details and references. ### Breaking Changes - Thrust 1.14.0 included a change that aliased the `cub` namespace to `thrust::cub`. This has caused issues with ambiguous namespaces for projects that declare `using namespace thrust;` from the global namespace. We recommend against this practice. ### New Features - NVIDIA/thrust#1464: Add preprocessor hooks that allow `thrust::` to be wrapped in an external namespace, and support cases when CUB is wrapped in an external namespace. ### Bug Fixes - NVIDIA/thrust#1457: Support cv-qualified types in `thrust::tuple_size` and `thrust::tuple_element`. Thanks to Jake Hemstad for this contribution. - NVIDIA/thrust#1471: Fixed excessive memory allocation in `scan_by_key`. Thanks to Lilo Huang for this contribution. - NVIDIA/thrust#1476: Removed dead code from the `expand` example. Thanks to Lilo Huang for this contribution. - NVIDIA/thrust#1488: Fixed the path to the installed CUB headers in the CMake `find_package` configuration files. - NVIDIA/thrust#1491: Fallback to `std::iterator_traits` when no `thrust::iterator_traits` specialization exists for an iterator type. Thanks to Divye Gala for this contribution. ## Thrust 1.13.1 (CUDA Toolkit 11.5) Thrust 1.13.1 is a minor release accompanying the CUDA Toolkit 11.5. This release provides a new hook for embedding the `thrust::` namespace inside a custom namespace. This is intended to work around various issues related to linking multiple shared libraries that use Thrust. The existing `CUB_NS_PREFIX` and `CUB_NS_POSTFIX` macros already provided this capability for CUB; this update provides a simpler mechanism that is extended to and integrated with Thrust. Simply define `THRUST_CUB_WRAPPED_NAMESPACE` to a namespace name, and both `thrust::` and `cub::` will be placed inside the new namespace. Using different wrapped namespaces for each shared library will prevent issues like those reported in NVIDIA/thrust#1401. ### New Features - NVIDIA/thrust#1464: Add `THRUST_CUB_WRAPPED_NAMESPACE` hooks. ### Bug Fixes - NVIDIA/thrust#1488: Fix path to installed CUB in Thrust's CMake config files. ## Thrust 1.13.0 (NVIDIA HPC SDK 21.7) Thrust 1.13.0 is the major release accompanying the NVIDIA HPC SDK 21.7 release. Notable changes include `bfloat16` radix sort support (via `thrust::sort`) and memory handling fixes in the `reserve` method of Thrust's vectors. The `CONTRIBUTING.md` file has been expanded to include instructions for building CUB as a component of Thrust, and API documentation now refers to [cppreference](https://cppreference.com) instead of SGI's old STL reference. ### Breaking Changes - NVIDIA/thrust#1459: Remove deprecated aliases `thrust::host_space_tag` and `thrust::device_space_tag`. Use the equivalent `thrust::host_system_tag` and `thrust::device_system_tag` instead. ### New Features - NVIDIA/cub#306: Add radix-sort support for `bfloat16` in `thrust::sort`. Thanks to Xiang Gao (@zasdfgbnm) for this contribution. - NVIDIA/thrust#1423: `thrust::transform_iterator` now supports non-copyable types. Thanks to Jake Hemstad (@jrhemstad) for this contribution. - NVIDIA/thrust#1459: Introduce a new `THRUST_IGNORE_DEPRECATED_API` macro that disables deprecation warnings on Thrust and CUB APIs. ### Bug Fixes - NVIDIA/cub#277: Fixed sanitizer warnings when `thrust::sort` calls into `cub::DeviceRadixSort`. Thanks to Andy Adinets (@canonizer) for this contribution. - NVIDIA/thrust#1442: Reduce extraneous comparisons in `thrust::sort`'s merge sort implementation. - NVIDIA/thrust#1447: Fix memory leak and avoid overallocation when calling `reserve` on Thrust's vector containers. Thanks to Kai Germaschewski (@germasch) for this contribution. ### Other Enhancements - NVIDIA/thrust#1405: Update links to standard C++ documentations from sgi to cppreference. Thanks to Muhammad Adeel Hussain (@AdeilH) for this contribution. - NVIDIA/thrust#1432: Updated build instructions in `CONTRIBUTING.md` to include details on building CUB's test suite as part of Thrust. ## Thrust 1.12.1 (CUDA Toolkit 11.4) Thrust 1.12.1 is a trivial patch release that slightly changes the phrasing of a deprecation message. ## Thrust 1.12.0 (NVIDIA HPC SDK 21.3) Thrust 1.12.0 is the major release accompanying the NVIDIA HPC SDK 21.3 and the CUDA Toolkit 11.4. It includes a new `thrust::universal_vector`, which holds data that is accessible from both host and device. This allows users to easily leverage CUDA's unified memory with Thrust. New asynchronous `thrust::async:exclusive_scan` and `inclusive_scan` algorithms have been added, and the synchronous versions of these have been updated to use `cub::DeviceScan` directly. CUB radix sort for floating point types is now stable when both +0.0 and -0.0 are present in the input. This affects some usages of `thrust::sort` and `thrust::stable_sort`. Many compilation warnings and subtle overflow bugs were fixed in the device algorithms, including a long-standing bug that returned invalid temporary storage requirements when `num_items` was close to (but not exceeding) `INT32_MAX`. This release deprecates support for Clang < 7.0 and MSVC < 2019 (aka 19.20/16.0/14.20). ### Breaking Changes - NVIDIA/thrust#1372: Deprecate Clang < 7 and MSVC < 2019. - NVIDIA/thrust#1376: Standardize `thrust::scan_by_key` functors / accumulator types. This may change the results from `scan_by_key` when input, output, and initial value types are not the same type. ### New Features - NVIDIA/thrust#1251: Add two new `thrust::async::` algorithms: `inclusive_scan` and `exclusive_scan`. - NVIDIA/thrust#1334: Add `thrust::universal_vector`, `universal_ptr`, and `universal_allocator`. ### Bug Fixes - NVIDIA/thrust#1347: Qualify calls to `make_reverse_iterator`. - NVIDIA/thrust#1359: Enable stricter warning flags. This fixes several outstanding issues: - NVIDIA/cub#221: Overflow in `temp_storage_bytes` when `num_items` close to (but not over) `INT32_MAX`. - NVIDIA/cub#228: CUB uses non-standard C++ extensions that break strict compilers. - NVIDIA/cub#257: Warning when compiling `GridEvenShare` with unsigned offsets. - NVIDIA/thrust#974: Conversion warnings in `thrust::transform_reduce`. - NVIDIA/thrust#1091: Conversion warnings in `thrust::counting_iterator`. - NVIDIA/thrust#1373: Fix compilation error when a standard library type is wrapped in `thrust::optional`. Thanks to Vukasin Milovanovic for this contribution. - NVIDIA/thrust#1388: Fix `signbit(double)` implementation on MSVC. - NVIDIA/thrust#1389: Support building Thrust tests without CUDA enabled. ### Other Enhancements - NVIDIA/thrust#1304: Use `cub::DeviceScan` to implement `thrust::exclusive_scan` and `thrust::inclusive_scan`. - NVIDIA/thrust#1362, NVIDIA/thrust#1370: Update smoke test naming. - NVIDIA/thrust#1380: Fix typos in `set_operation` documentation. Thanks to Hongyu Cai for this contribution. - NVIDIA/thrust#1383: Include FreeBSD license in LICENSE.md for `thrust::complex` implementation. - NVIDIA/thrust#1384: Add missing precondition to `thrust::gather` documentation. ## Thrust 1.11.0 (CUDA Toolkit 11.3) Thrust 1.11.0 is a major release providing bugfixes and performance enhancements. It includes a new sort algorithm that provides up to 2x more performance from `thrust::sort` when used with certain key types and hardware. The new `thrust::shuffle` algorithm has been tweaked to improve the randomness of the output. Our CMake package and build system continue to see improvements with better `add_subdirectory` support, installation rules, status messages, and other features that make Thrust easier to use from CMake projects. The release includes several other bugfixes and modernizations, and received updates from 12 contributors. ### New Features - NVIDIA/cub#204: New implementation for `thrust::sort` on CUDA when using 32/64-bit numeric keys on Pascal and up (SM60+). This improved radix sort algorithm provides up to 2x more performance. Thanks for Andy Adinets for this contribution. - NVIDIA/thrust#1310, NVIDIA/thrust#1312: Various tuple-related APIs have been updated to use variadic templates. Thanks for Andrew Corrigan for these contributions. - NVIDIA/thrust#1297: Optionally add install rules when included with CMake's `add_subdirectory`. Thanks to Kai Germaschewski for this contribution. ### Bug Fixes - NVIDIA/thrust#1309: Fix `thrust::shuffle` to produce better quality random distributions. Thanks to Rory Mitchell and Daniel Stokes for this contribution. - NVIDIA/thrust#1337: Fix compile-time regression in `transform_inclusive_scan` and `transform_exclusive_scan`. - NVIDIA/thrust#1306: Fix binary search `middle` calculation to avoid overflows. Thanks to Richard Barnes for this contribution. - NVIDIA/thrust#1314: Use `size_t` for the index type parameter in `thrust::tuple_element`. Thanks to Andrew Corrigan for this contribution. - NVIDIA/thrust#1329: Fix runtime error when copying an empty `thrust::device_vector` in MSVC Debug builds. Thanks to Ben Jude for this contribution. - NVIDIA/thrust#1323: Fix and add test for cmake package install rules. Thanks for Keith Kraus and Kai Germaschewski for testing and discussion. - NVIDIA/thrust#1338: Fix GCC version checks in `thrust::detail::is_pod` implementation. Thanks to Anatoliy Tomilov for this contribution. - NVIDIA/thrust#1289: Partial fixes for Clang 10 as host compiler. Filed an NVCC bug that will be fixed in a future version of the CUDA Toolkit (NVBug 3136307). - NVIDIA/thrust#1272: Fix ambiguous `iter_swap` call when using `thrust::partition` with STL containers. Thanks to Isaac Deutsch for this contribution. - NVIDIA/thrust#1281: Update our bundled `FindTBB.cmake` module to support latest MSVC. - NVIDIA/thrust#1298: Use semantic versioning rules for our CMake package's compatibility checks. Thanks to Kai Germaschewski for this contribution. - NVIDIA/thrust#1300: Use `FindPackageHandleStandardArgs` to print standard status messages when our CMake package is found. Thanks to Kai Germaschewski for this contribution. - NVIDIA/thrust#1320: Use feature-testing instead of a language dialect check for `thrust::remove_cvref`. Thanks to Andrew Corrigan for this contribution. - NVIDIA/thrust#1319: Suppress GPU deprecation warnings. ### Other Enhancements - NVIDIA/cub#213: Removed some tuning policies for unsupported hardware (` specialization. - The `thrust::intermediate_type_from_function_and_iterators` helper is no longer needed and has been removed. - NVIDIA/thrust#1255: Always use `cudaStreamSynchronize` instead of `cudaDeviceSynchronize` if the execution policy has a stream attached to it. Thanks to Rong Ou for this contribution. - NVIDIA/thrust#1201: Tests for correct handling of legacy and per-thread default streams. Thanks to Rong Ou for this contribution. ### Bug Fixes - NVIDIA/thrust#1260: Fix `thrust::transform_inclusive_scan` with heterogeneous types. Thanks to Rong Ou for this contribution. - NVIDIA/thrust#1258, NVC++ FS #28463: Ensure the CUDA radix sort backend synchronizes before returning; otherwise, copies from temporary storage will race with destruction of said temporary storage. - NVIDIA/thrust#1264: Evaluate `CUDA_CUB_RET_IF_FAIL` macro argument only once. Thanks to Jason Lowe for this contribution. - NVIDIA/thrust#1262: Add missing `` header. - NVIDIA/thrust#1250: Restore some `THRUST_DECLTYPE_RETURNS` macros in async test implementations. - NVIDIA/thrust#1249: Use `std::iota` in `CUDATestDriver::target_devices`. Thanks to Michael Francis for this contribution. - NVIDIA/thrust#1244: Check for macro collisions with system headers during header testing. - NVIDIA/thrust#1224: Remove unnecessary SFINAE contexts from asynchronous algorithms. - NVIDIA/thrust#1190: Make `out_of_memory_recovery` test trigger faster. - NVIDIA/thrust#1187: Elminate superfluous iterators specific to the CUDA backend. - NVIDIA/thrust#1181: Various fixes for GoUDA. Thanks to Andrei Tchouprakov for this contribution. - NVIDIA/thrust#1178, NVIDIA/thrust#1229: Use transparent functionals in placeholder expressions, fixing issues with `thrust::device_reference` and placeholder expressions and `thrust::find` with asymmetric equality operators. - NVIDIA/thrust#1153: Switch to placement new instead of assignment to construct items in uninitialized memory. Thanks to Hugh Winkler for this contribution. - NVIDIA/thrust#1050: Fix compilation of asynchronous algorithms when RDC is enabled. - NVIDIA/thrust#1042: Correct return type of `thrust::detail::predicate_to_integral` from `bool` to `IntegralType`. Thanks to Andreas Hehn for this contribution. - NVIDIA/thrust#1009: Avoid returning uninitialized allocators. Thanks to Zhihao Yuan for this contribution. - NVIDIA/thrust#990: Add missing `` include to ``. Thanks to Robert Maynard for this contribution. - NVIDIA/thrust#966: Fix spurious MSVC conversion with loss of data warning in sort algorithms. Thanks to Zhihao Yuan for this contribution. - Add more metadata to mock specializations for testing iterator in `testing/copy.cu`. - Add missing include to shuffle unit test. - Specialize `thrust::wrapped_function` for `void` return types because MSVC is not a fan of the pattern `return static_cast(expr);`. - Replace deprecated `tbb/tbb_thread.h` with ``. - Fix overcounting of initial value in TBB scans. - Use `thrust::advance` instead of `+=` for generic iterators. - Wrap the OMP flags in `-Xcompiler` for NVCC - Extend `ASSERT_STATIC_ASSERT` skip for the OMP backend. - Add missing header caught by `tbb.cuda` configs. - Fix "unsafe API" warnings in examples on MSVC: `s/fopen/fstream/` - Various C++17 fixes. ## Thrust 1.9.10-1 (NVIDIA HPC SDK 20.7, CUDA Toolkit 11.1) Thrust 1.9.10-1 is the minor release accompanying the NVIDIA HPC SDK 20.7 release and the CUDA Toolkit 11.1 release. ### Bug Fixes - #1214, NVBug 200619442: Stop using `std::allocator` APIs deprecated in C++17. - #1216, NVBug 200540293: Make `thrust::optional` work with Clang when used with older libstdc++. - #1207, NVBug 200618218: Don't force C++14 with older compilers that don't support it. - #1218: Wrap includes of `` and `` to avoid circular inclusion with NVC++. ## Thrust 1.9.10 (NVIDIA HPC SDK 20.5) Thrust 1.9.10 is the release accompanying the NVIDIA HPC SDK 20.5 release. It adds CMake support for compilation with NVC++ and a number of minor bug fixes for NVC++. It also adds CMake `find_package` support, which replaces the broken 3rd-party legacy `FindThrust.cmake` script. C++03, C++11, GCC < 5, Clang < 6, and MSVC < 2017 are now deprecated. Starting with the upcoming 1.10.0 release, C++03 support will be dropped entirely. ### Breaking Changes - #1082: Thrust now checks that it is compatible with the version of CUB found in your include path, generating an error if it is not. If you are using your own version of CUB, it may be too old. It is recommended to simply delete your own version of CUB and use the version of CUB that comes with Thrust. - #1089: C++03 and C++11 are deprecated. Using these dialects will generate a compile-time warning. These warnings can be suppressed by defining `THRUST_IGNORE_DEPRECATED_CPP_DIALECT` (to suppress C++03 and C++11 deprecation warnings) or `THRUST_IGNORE_DEPRECATED_CPP11` (to suppress C++11 deprecation warnings). Suppression is only a short term solution. We will be dropping support for C++03 in the 1.10.0 release and C++11 in the near future. - #1089: GCC < 5, Clang < 6, and MSVC < 2017 are deprecated. Using these compilers will generate a compile-time warning. These warnings can be suppressed by defining `THRUST_IGNORE_DEPRECATED_COMPILER`. Suppression is only a short term solution. We will be dropping support for these compilers in the near future. ### New Features - #1130: CMake `find_package` support. This is significant because there is a legacy `FindThrust.cmake` script authored by a third party in widespread use in the community which has a bug in how it parses Thrust version numbers which will cause it to incorrectly parse 1.9.10. This script only handles the first digit of each part of the Thrust version number correctly: for example, Thrust 17.17.17 would be interpreted as Thrust 1.1.1701717. You can find directions for using the new CMake `find_package` support and migrating away from the legacy `FindThrust.cmake` [here](https://github.com/NVIDIA/thrust/blob/main/thrust/cmake/README.md) - #1129: Added `thrust::detail::single_device_tls_caching_allocator`, a convenient way to get an MR caching allocator for device memory, which is used by NVC++. ### Other Enhancements - #1129: Refactored RDC handling in CMake to be a global option and not create two targets for each example and test. ### Bug Fixes - #1129: Fix the legacy `thrust::return_temporary_buffer` API to support passing a size. This was necessary to enable usage of Thrust caching MR allocators with synchronous Thrust algorithms. This change has allowed NVC++’s C++17 Parallel Algorithms implementation to switch to use Thrust caching MR allocators for device temporary storage, which gives a 2x speedup on large multi-GPU systems such as V100 and A100 DGX where `cudaMalloc` is very slow. - #1128: Respect `CUDA_API_PER_THREAD_DEFAULT_STREAM`. Thanks to Rong Ou for this contribution. - #1131: Fix the one-policy overload of `thrust::async::copy` to not copy the policy, resolving use-afer-move issues. - #1145: When cleaning up type names in `unittest::base_class_name`, only call `std::string::replace` if we found the substring we are looking to replace. - #1139: Don't use `cxx::__demangle` in NVC++. - #1102: Don't use `thrust::detail::normal_distribution_nvcc` for Feta because it uses `erfcinv`, a non-standard function that Feta doesn't have. ## Thrust 1.9.9 (CUDA Toolkit 11.0) Thrust 1.9.9 adds support for NVC++, which uses Thrust to implement GPU-accelerated C++17 Parallel Algorithms. `thrust::zip_function` and `thrust::shuffle` were also added. C++03, C++11, GCC < 5, Clang < 6, and MSVC < 2017 are now deprecated. Starting with the upcoming 1.10.0 release, C++03 support will be dropped entirely. All other deprecated platforms will be dropped in the near future. ### Breaking Changes - #1082: Thrust now checks that it is compatible with the version of CUB found in your include path, generating an error if it is not. If you are using your own version of CUB, it may be too old. It is recommended to simply delete your own version of CUB and use the version of CUB that comes with Thrust. - #1089: C++03 and C++11 are deprecated. Using these dialects will generate a compile-time warning. These warnings can be suppressed by defining `THRUST_IGNORE_DEPRECATED_CPP_DIALECT` (to suppress C++03 and C++11 deprecation warnings) or `THRUST_IGNORE_DEPRECATED_CPP_11` (to suppress C++11 deprecation warnings). Suppression is only a short term solution. We will be dropping support for C++03 in the 1.10.0 release and C++11 in the near future. - #1089: GCC < 5, Clang < 6, and MSVC < 2017 are deprecated. Using these compilers will generate a compile-time warning. These warnings can be suppressed by defining `THRUST_IGNORE_DEPRECATED_COMPILER`. Suppression is only a short term solution. We will be dropping support for these compilers in the near future. ### New Features - #1086: Support for NVC++ aka "Feta". The most significant change is in how we use `__CUDA_ARCH__`. Now, there are four macros that must be used: - `THRUST_IS_DEVICE_CODE`, which should be used in an `if` statement around device-only code. - `THRUST_INCLUDE_DEVICE_CODE`, which should be used in an `#if` preprocessor directive inside of the `if` statement mentioned in the prior bullet. - `THRUST_IS_HOST_CODE`, which should be used in an `if` statement around host-only code. - `THRUST_INCLUDE_HOST_CODE`, which should be used in an `#if` preprocessor directive inside of the `if` statement mentioned in the prior bullet. - #1085: `thrust::shuffle`. Thanks to Rory Mitchell for this contribution. - #1029: `thrust::zip_function`, a facility for zipping functions that take N parameters instead of a tuple of N parameters as `thrust::zip_iterator` does. Thanks to Ben Jude for this contribution. - #1068: `thrust::system::cuda::managed_memory_pointer`, a universal memory strongly typed pointer compatible with the ISO C++ Standard Library. ### Other Enhancements - #1029: Thrust is now built and tested with NVCC warnings treated as errors. - #1029: MSVC C++11 support. - #1029: `THRUST_DEPRECATED` abstraction for generating compile-time deprecation warning messages. - #1029: `thrust::pointer::pointer_to(reference)`. - #1070: Unit test for `thrust::inclusive_scan` with a user defined types. Thanks to Conor Hoekstra for this contribution. ### Bug Fixes - #1088: Allow `thrust::replace` to take functions that have non-`const` `operator()`. - #1094: Add missing `constexpr` to `par_t` constructors. Thanks to Patrick Stotko for this contribution. - #1077: Remove `__device__` from CUDA MR-based device allocators to fix obscure "host function called from host device function" warning that occurs when you use the new Thrust MR-based allocators. - #1029: Remove inconsistently-used `THRUST_BEGIN`/`END_NS` macros. - #1029: Fix C++ dialect detection on newer MSVC. - #1029 Use `_Pragma`/`__pragma` instead of `#pragma` in macros. - #1029: Replace raw `__cplusplus` checks with the appropriate Thrust macros. - #1105: Add a missing `` include. - #1103: Fix regression of `thrust::detail::temporary_allocator` with non-CUDA back ends. - #1111: Use Thrust's random number engine instead of `std::`s in device code. - #1108: Get rid of a GCC 9 warning about deprecated generation of copy ctors. ## Thrust 1.9.8-1 (NVIDIA HPC SDK 20.3) Thrust 1.9.8-1 is a variant of 1.9.8 accompanying the NVIDIA HPC SDK 20.3 release. It contains modifications necessary to serve as the implementation of NVC++'s GPU-accelerated C++17 Parallel Algorithms when using the CUDA Toolkit 11.0 release. ## Thrust 1.9.8 (CUDA Toolkit 11.0 Early Access) Thrust 1.9.8, which is included in the CUDA Toolkit 11.0 release, removes Thrust's internal derivative of CUB, upstreams all relevant changes too CUB, and adds CUB as a Git submodule. It will now be necessary to do `git clone --recursive` when checking out Thrust, and to update the CUB submodule when pulling in new Thrust changes. Additionally, CUB is now included as a first class citizen in the CUDA toolkit. Thrust 1.9.8 also fixes bugs preventing most Thrust algorithms from working with more than `2^31-1` elements. Now, `thrust::reduce`, `thrust::*_scan`, and related algorithms (aka most of Thrust) work with large element counts. ### Breaking Changes - Thrust will now use the version of CUB in your include path instead of its own internal copy. If you are using your own version of CUB, it may be older and incompatible with Thrust. It is recommended to simply delete your own version of CUB and use the version of CUB that comes with Thrust. ### Other Enhancements - Refactor Thrust and CUB to support 64-bit indices in most algorithms. In most cases, Thrust now selects between kernels that use 32-bit indices and 64-bit indices at runtime depending on the size of the input. This means large element counts work, but small element counts do not have to pay for the register usage of 64-bit indices if they are not needed. Now, `thrust::reduce`, `thrust::*_scan`, and related algorithms (aka most of Thrust) work with more than `2^31-1` elements. Notably, `thrust::sort` is still limited to less than `2^31-1` elements. - CUB is now a submodule and the internal copy of CUB has been removed. - #1051: Stop specifying the `__launch_bounds__` minimum blocks parameter because it messes up register allocation and increases register pressure, and we don't actually know at compile time how many blocks we will use (aside from single tile kernels). ### Bug Fixes - #1020: After making a CUDA API call, always clear the global CUDA error state by calling `cudaGetLastError`. - #1021: Avoid calling destroy in the destructor of a Thrust vector if the vector is empty. - #1046: Actually throw `thrust::bad_alloc` when `thrust::system::cuda::malloc` fails instead of just constructing a temporary and doing nothing with it. - Add missing copy constructor or copy assignment operator to all classes that GCC 9's `-Wdeprecated-copy` complains about - Add missing move operations to `thrust::system::cuda::vector`. - #1015: Check that the backend is CUDA before using CUDA-specifics in `thrust::detail::temporary_allocator`. Thanks to Hugh Winkler for this contribution. - #1055: More correctly detect the presence of aligned/sized `new`/`delete`. - #1043: Fix ill-formed specialization of `thrust::system::is_error_code_enum` for `thrust::event_errc`. Thanks to Toru Niina for this contribution. - #1027: Add tests for `thrust::tuple_for_each` and `thrust::tuple_subset`. Thanks to Ben Jude for this contribution. - #1027: Use correct macro in `thrust::tuple_for_each`. Thanks to Ben Jude for this contribution. - #1026: Use correct MSVC version formatting in CMake. Thanks to Ben Jude for this contribution. - Workaround an NVCC issue with type aliases with template template arguments containing a parameter pack. - Remove unused functions from the CUDA backend which call slow CUDA attribute query APIs. - Replace `CUB_RUNTIME_FUNCTION` with `THRUST_RUNTIME_FUNCTION`. - Correct typo in `thrust::transform` documentation. Thanks to Eden Yefet for this contribution. ### Known Issues - `thrust::sort` remains limited to `2^31-1` elements for now. ## Thrust 1.9.7-1 (CUDA Toolkit 10.2 for Tegra) Thrust 1.9.7-1 is a minor release accompanying the CUDA Toolkit 10.2 release for Tegra. It is nearly identical to 1.9.7. ### Bug Fixes - Remove support for GCC's broken nodiscard-like attribute. ## Thrust 1.9.7 (CUDA Toolkit 10.2) Thrust 1.9.7 is a minor release accompanying the CUDA Toolkit 10.2 release. Unfortunately, although the version and patch numbers are identical, one bug fix present in Thrust 1.9.7 (NVBug 2646034: Fix incorrect dependency handling for stream acquisition in `thrust::future`) was not included in the CUDA Toolkit 10.2 preview release for AArch64 SBSA. The tag `cuda-10.2aarch64sbsa` contains the exact version of Thrust present in the CUDA Toolkit 10.2 preview release for AArch64 SBSA. ### Bug Fixes - #967, NVBug 2448170: Fix the CUDA backend `thrust::for_each` so that it supports large input sizes with 64-bit indices. - NVBug 2646034: Fix incorrect dependency handling for stream acquisition in `thrust::future`. - Not present in the CUDA Toolkit 10.2 preview release for AArch64 SBSA. - #968, NVBug 2612102: Fix the `thrust::mr::polymorphic_adaptor` to actually use its template parameter. ## Thrust 1.9.6-1 (NVIDIA HPC SDK 20.3) Thrust 1.9.6-1 is a variant of 1.9.6 accompanying the NVIDIA HPC SDK 20.3 release. It contains modifications necessary to serve as the implementation of NVC++'s GPU-accelerated C++17 Parallel Algorithms when using the CUDA Toolkit 10.1 Update 2 release. ## Thrust 1.9.6 (CUDA Toolkit 10.1 Update 2) Thrust 1.9.6 is a minor release accompanying the CUDA Toolkit 10.1 Update 2 release. ### Bug Fixes - NVBug 2509847: Inconsistent alignment of `thrust::complex` - NVBug 2586774: Compilation failure with Clang + older libstdc++ that doesn't have `std::is_trivially_copyable` - NVBug 200488234: CUDA header files contain Unicode characters which leads compiling errors on Windows - #949, #973, NVBug 2422333, NVBug 2522259, NVBug 2528822: `thrust::detail::aligned_reinterpret_cast` must be annotated with `__host__ __device__`. - NVBug 2599629: Missing include in the OpenMP sort implementation - NVBug 200513211: Truncation warning in test code under VC142 ## Thrust 1.9.5 (CUDA Toolkit 10.1 Update 1) Thrust 1.9.5 is a minor release accompanying the CUDA Toolkit 10.1 Update 1 release. ### Bug Fixes - NVBug 2502854: Fixed assignment of `thrust::device_vector>` between host and device. ## Thrust 1.9.4 (CUDA Toolkit 10.1) Thrust 1.9.4 adds asynchronous interfaces for parallel algorithms, a new allocator system including caching allocators and unified memory support, as well as a variety of other enhancements, mostly related to C++11/C++14/C++17/C++20 support. The new asynchronous algorithms in the `thrust::async` namespace return `thrust::event` or `thrust::future` objects, which can be waited upon to synchronize with the completion of the parallel operation. ### Breaking Changes Synchronous Thrust algorithms now block until all of their operations have completed. Use the new asynchronous Thrust algorithms for non-blocking behavior. ### New Features - `thrust::event` and `thrust::future`, uniquely-owned asynchronous handles consisting of a state (ready or not ready), content (some value; for `thrust::future` only), and an optional set of objects that should be destroyed only when the future's value is ready and has been consumed. - The design is loosely based on C++11's `std::future`. - They can be `.wait`'d on, and the value of a future can be waited on and retrieved with `.get` or `.extract`. - Multiple `thrust::event`s and `thrust::future`s can be combined with `thrust::when_all`. - `thrust::future`s can be converted to `thrust::event`s. - Currently, these primitives are only implemented for the CUDA backend and are C++11 only. - New asynchronous algorithms that return `thrust::event`/`thrust::future`s, implemented as C++20 range style customization points: - `thrust::async::reduce`. - `thrust::async::reduce_into`, which takes a target location to store the reduction result into. - `thrust::async::copy`, including a two-policy overload that allows explicit cross system copies which execution policy properties can be attached to. - `thrust::async::transform`. - `thrust::async::for_each`. - `thrust::async::stable_sort`. - `thrust::async::sort`. - By default the asynchronous algorithms use the new caching allocators. Deallocation of temporary storage is deferred until the destruction of the returned `thrust::future`. The content of `thrust::future`s is stored in either device or universal memory and transferred to the host only upon request to prevent unnecessary data migration. - Asynchronous algorithms are currently only implemented for the CUDA system and are C++11 only. - `exec.after(f, g, ...)`, a new execution policy method that takes a set of `thrust::event`/`thrust::future`s and returns an execution policy that operations on that execution policy should depend upon. - New logic and mindset for the type requirements for cross-system sequence copies (currently only used by `thrust::async::copy`), based on: - `thrust::is_contiguous_iterator` and `THRUST_PROCLAIM_CONTIGUOUS_ITERATOR` for detecting/indicating that an iterator points to contiguous storage. - `thrust::is_trivially_relocatable` and `THRUST_PROCLAIM_TRIVIALLY_RELOCATABLE` for detecting/indicating that a type is `memcpy`able (based on principles from [P1144](https://wg21.link/P1144)). - The new approach reduces buffering, increases performance, and increases correctness. - The fast path is now enabled when copying CUDA `__half` and vector types with `thrust::async::copy`. - All Thrust synchronous algorithms for the CUDA backend now actually synchronize. Previously, any algorithm that did not allocate temporary storage (counterexample: `thrust::sort`) and did not have a computation-dependent result (counterexample: `thrust::reduce`) would actually be launched asynchronously. Additionally, synchronous algorithms that allocated temporary storage would become asynchronous if a custom allocator was supplied that did not synchronize on allocation/deallocation, unlike `cudaMalloc`/`cudaFree`. So, now `thrust::for_each`, `thrust::transform`, `thrust::sort`, etc are truly synchronous. In some cases this may be a performance regression; if you need asynchrony, use the new asynchronous algorithms. - Thrust's allocator framework has been rewritten. It now uses a memory resource system, similar to C++17's `std::pmr` but supporting static polymorphism. Memory resources are objects that allocate untyped storage and allocators are cheap handles to memory resources in this new model. The new facilities live in ``. - `thrust::mr::memory_resource`, the memory resource base class, which takes a (possibly tagged) pointer to `void` type as a parameter. - `thrust::mr::allocator`, an allocator backed by a memory resource object. - `thrust::mr::polymorphic_adaptor_resource`, a type-erased memory resource adaptor. - `thrust::mr::polymorphic_allocator`, a C++17-style polymorphic allocator backed by a type-erased memory resource object. - New tunable C++17-style caching memory resources, `thrust::mr::(disjoint_)?(un)?synchronized_pool_resource`, designed to cache both small object allocations and large repetitive temporary allocations. The disjoint variants use separate storage for management of the pool, which is necessary if the memory being allocated cannot be accessed on the host (e.g. device memory). - System-specific allocators were rewritten to use the new memory resource framework. - New `thrust::device_memory_resource` for allocating device memory. - New `thrust::universal_memory_resource` for allocating memory that can be accessed from both the host and device (e.g. `cudaMallocManaged`). - New `thrust::universal_host_pinned_memory_resource` for allocating memory that can be accessed from the host and the device but always resides in host memory (e.g. `cudaMallocHost`). - `thrust::get_per_device_resource` and `thrust::per_device_allocator`, which lazily create and retrieve a per-device singleton memory resource. - Rebinding mechanisms (`rebind_traits` and `rebind_alloc`) for `thrust::allocator_traits`. - `thrust::device_make_unique`, a factory function for creating a `std::unique_ptr` to a newly allocated object in device memory. - ``, a C++11 implementation of the C++17 uninitialized memory algorithms. - `thrust::allocate_unique` and friends, based on the proposed C++23 [`std::allocate_unique`](https://wg21.link/P0211). - New type traits and metaprogramming facilities. Type traits are slowly being migrated out of `thrust::detail::` and ``; their new home will be `thrust::` and ``. - `thrust::is_execution_policy`. - `thrust::is_operator_less_or_greater_function_object`, which detects `thrust::less`, `thrust::greater`, `std::less`, and `std::greater`. - `thrust::is_operator_plus_function_object``, which detects `thrust::plus` and `std::plus`. - `thrust::remove_cvref(_t)?`, a C++11 implementation of C++20's `thrust::remove_cvref(_t)?`. - `thrust::void_t`, and various other new type traits. - `thrust::integer_sequence` and friends, a C++11 implementation of C++20's `std::integer_sequence` - `thrust::conjunction`, `thrust::disjunction`, and `thrust::disjunction`, a C++11 implementation of C++17's logical metafunctions. - Some Thrust type traits (such as `thrust::is_constructible`) have been redefined in terms of C++11's type traits when they are available. - ``, new `std::tuple` algorithms: - `thrust::tuple_transform`. - `thrust::tuple_for_each`. - `thrust::tuple_subset`. - Miscellaneous new `std::`-like facilities: - `thrust::optional`, a C++11 implementation of C++17's `std::optional`. - `thrust::addressof`, an implementation of C++11's `std::addressof`. - `thrust::next` and `thrust::prev`, an implementation of C++11's `std::next` and `std::prev`. - `thrust::square`, a `` style unary function object that multiplies its argument by itself. - `` and `thrust::numeric_limits`, a customized version of `` and `std::numeric_limits`. - ``, new general purpose preprocessor facilities: - `THRUST_PP_CAT[2-5]`, concatenates two to five tokens. - `THRUST_PP_EXPAND(_ARGS)?`, performs double expansion. - `THRUST_PP_ARITY` and `THRUST_PP_DISPATCH`, tools for macro overloading. - `THRUST_PP_BOOL`, boolean conversion. - `THRUST_PP_INC` and `THRUST_PP_DEC`, increment/decrement. - `THRUST_PP_HEAD`, a variadic macro that expands to the first argument. - `THRUST_PP_TAIL`, a variadic macro that expands to all its arguments after the first. - `THRUST_PP_IIF`, bitwise conditional. - `THRUST_PP_COMMA_IF`, and `THRUST_PP_HAS_COMMA`, facilities for adding and detecting comma tokens. - `THRUST_PP_IS_VARIADIC_NULLARY`, returns true if called with a nullary `__VA_ARGS__`. - `THRUST_CURRENT_FUNCTION`, expands to the name of the current function. - New C++11 compatibility macros: - `THRUST_NODISCARD`, expands to `[[nodiscard]]` when available and the best equivalent otherwise. - `THRUST_CONSTEXPR`, expands to `constexpr` when available and the best equivalent otherwise. - `THRUST_OVERRIDE`, expands to `override` when available and the best equivalent otherwise. - `THRUST_DEFAULT`, expands to `= default;` when available and the best equivalent otherwise. - `THRUST_NOEXCEPT`, expands to `noexcept` when available and the best equivalent otherwise. - `THRUST_FINAL`, expands to `final` when available and the best equivalent otherwise. - `THRUST_INLINE_CONSTANT`, expands to `inline constexpr` when available and the best equivalent otherwise. - ``, new C++11-only type deduction helpers: - `THRUST_DECLTYPE_RETURNS*`, expand to function definitions with suitable conditional `noexcept` qualifiers and trailing return types. - `THRUST_FWD(x)`, expands to `::std::forward(x)`. - `THRUST_MVCAP`, expands to a lambda move capture. - `THRUST_RETOF`, expands to a decltype computing the return type of an invocable. - New CMake build system. ### New Examples - `mr_basic` demonstrates how to use the new memory resource allocator system. ### Other Enhancements - Tagged pointer enhancements: - New `thrust::pointer_traits` specialization for `void const*`. - `nullptr` support to Thrust tagged pointers. - New `explicit operator bool` for Thrust tagged pointers when using C++11 for `std::unique_ptr` interoperability. - Added `thrust::reinterpret_pointer_cast` and `thrust::static_pointer_cast` for casting Thrust tagged pointers. - Iterator enhancements: - `thrust::iterator_system` is now SFINAE friendly. - Removed cv qualifiers from iterator types when using `thrust::iterator_system`. - Static assert enhancements: - New `THRUST_STATIC_ASSERT_MSG`, takes an optional string constant to be used as the error message when possible. - Update `THRUST_STATIC_ASSERT(_MSG)` to use C++11's `static_assert` when it's available. - Introduce a way to test for static assertions. - Testing enhancements: - Additional scalar and sequence types, including non-builtin types and vectors with unified memory allocators, have been added to the list of types used by generic unit tests. - The generation of random input data has been improved to increase the range of values used and catch more corner cases. - New `unittest::truncate_to_max_representable` utility for avoiding the generation of ranges that cannot be represented by the underlying element type in generic unit test code. - The test driver now synchronizes with CUDA devices and check for errors after each test, when switching devices, and after each raw kernel launch. - The `warningtester` uber header is now compiled with NVCC to avoid needing to disable CUDA-specific code with the preprocessor. - Fixed the unit test framework's `ASSERT_*` to print `char`s as `int`s. - New `DECLARE_INTEGRAL_VARIABLE_UNITTEST` test declaration macro. - New `DECLARE_VARIABLE_UNITTEST_WITH_TYPES_AND_NAME` test declaration macro. - `thrust::system_error` in the CUDA backend now print out its `cudaError_t` enumerator in addition to the diagnostic message. - Stopped using conditionally signed types like `char`. ### Bug Fixes - #897, NVBug 2062242: Fix compilation error when using `__device__` lambdas with `thrust::reduce` on MSVC. - #908, NVBug 2089386: Static assert that `thrust::generate`/`thrust::fill` isn't operating on const iterators. - #919 Fix compilation failure with `thrust::zip_iterator` and `thrust::complex`. - #924, NVBug 2096679, NVBug 2315990: Fix dispatch for the CUDA backend's `thrust::reduce` to use two functions (one with the pragma for disabling exec checks, one with `THRUST_RUNTIME_FUNCTION`) instead of one. This fixes a regression with device compilation that started in CUDA Toolkit 9.2. - #928, NVBug 2341455: Add missing `__host__ __device__` annotations to a `thrust::complex::operator=` to satisfy GoUDA. - NVBug 2094642: Make `thrust::vector_base::clear` not depend on the element type being default constructible. - NVBug 2289115: Remove flaky `simple_cuda_streams` example. - NVBug 2328572: Add missing `thrust::device_vector` constructor that takes an allocator parameter. - NVBug 2455740: Update the `range_view` example to not use device-side launch. - NVBug 2455943: Ensure that sized unit tests that use `thrust::counting_iterator` perform proper truncation. - NVBug 2455952: Refactor questionable `thrust::copy_if` unit tests. ## Thrust 1.9.3 (CUDA Toolkit 10.0) Thrust 1.9.3 unifies and integrates CUDA Thrust and GitHub Thrust. ### Bug Fixes - #725, #850, #855, #859, #860: Unify the `thrust::iter_swap` interface and fix `thrust::device_reference` swapping. - NVBug 2004663: Add a `data` method to `thrust::detail::temporary_array` and refactor temporary memory allocation in the CUDA backend to be exception and leak safe. - #886, #894, #914: Various documentation typo fixes. - #724: Provide `NVVMIR_LIBRARY_DIR` environment variable to NVCC. - #878: Optimize `thrust::min/max_element` to only use `thrust::detail::get_iterator_value` for non-numeric types. - #899: Make `thrust::cuda::experimental::pinned_allocator`'s comparison operators `const`. - NVBug 2092152: Remove all includes of ``. - #911: Fix default comparator element type for `thrust::merge_by_key`. ### Acknowledgments - Thanks to Andrew Corrigan for contributing fixes for swapping interfaces. - Thanks to Francisco Facioni for contributing optimizations for `thrust::min/max_element`. ## Thrust 1.9.2 (CUDA Toolkit 9.2) Thrust 1.9.2 brings a variety of performance enhancements, bug fixes and test improvements. CUB 1.7.5 was integrated, enhancing the performance of `thrust::sort` on small data types and `thrust::reduce`. Changes were applied to `complex` to optimize memory access. Thrust now compiles with compiler warnings enabled and treated as errors. Additionally, the unit test suite and framework was enhanced to increase coverage. ### Breaking Changes - The `fallback_allocator` example was removed, as it was buggy and difficult to support. ### New Features - ``, utilities for memory alignment: - `thrust::aligned_reinterpret_cast`. - `thrust::aligned_storage_size`, which computes the amount of storage needed for an object of a particular size and alignment. - `thrust::alignment_of`, a C++03 implementation of C++11's `std::alignment_of`. - `thrust::aligned_storage`, a C++03 implementation of C++11's `std::aligned_storage`. - `thrust::max_align_t`, a C++03 implementation of C++11's `std::max_align_t`. ### Bug Fixes - NVBug 200385527, NVBug 200385119, NVBug 200385113, NVBug 200349350, NVBug 2058778: Various compiler warning issues. - NVBug 200355591: `thrust::reduce` performance issues. - NVBug 2053727: Fixed an ADL bug that caused user-supplied `allocate` to be overlooked but `deallocate` to be called with GCC <= 4.3. - NVBug 1777043: Fixed `thrust::complex` to work with `thrust::sequence`. ## Thrust 1.9.1-2 (CUDA Toolkit 9.1) Thrust 1.9.1-2 integrates version 1.7.4 of CUB and introduces a new CUDA backend for `thrust::reduce` based on CUB. ### Bug Fixes - NVBug 1965743: Remove unnecessary static qualifiers. - NVBug 1940974: Fix regression causing a compilation error when using `thrust::merge_by_key` with `thrust::constant_iterator`s. - NVBug 1904217: Allow callables that take non-const refs to be used with `thrust::reduce` and `thrust::*_scan`. ## Thrust 1.9.0-5 (CUDA Toolkit 9.0) Thrust 1.9.0-5 replaces the original CUDA backend (bulk) with a new one written using CUB, a high performance CUDA collectives library. This brings a substantial performance improvement to the CUDA backend across the board. ### Breaking Changes - Any code depending on CUDA backend implementation details will likely be broken. ### New Features - New CUDA backend based on CUB which delivers substantially higher performance. - `thrust::transform_output_iterator`, a fancy iterator that applies a function to the output before storing the result. ### New Examples - `transform_output_iterator` demonstrates use of the new fancy iterator `thrust::transform_output_iterator`. ### Other Enhancements - When C++11 is enabled, functors do not have to inherit from `thrust::(unary|binary)_function` anymore to be used with `thrust::transform_iterator`. - Added C++11 only move constructors and move assignment operators for `thrust::detail::vector_base`-based classes, e.g. `thrust::host_vector`, `thrust::device_vector`, and friends. ### Bug Fixes - `sin(thrust::complex)` no longer has precision loss to float. ### Acknowledgments - Thanks to Manuel Schiller for contributing a C++11 based enhancement regarding the deduction of functor return types, improving the performance of `thrust::unique` and implementing `thrust::transform_output_iterator`. - Thanks to Thibault Notargiacomo for the implementation of move semantics for the `thrust::vector_base`-based classes. - Thanks to Duane Merrill for developing CUB and helping to integrate it into Thrust's backend. ## Thrust 1.8.3 (CUDA Toolkit 8.0) Thrust 1.8.3 is a small bug fix release. ### New Examples - `range_view` demonstrates the use of a view (a non-owning wrapper for an iterator range with a container-like interface). ### Bug Fixes - `thrust::(min|max|minmax)_element` can now accept raw device pointers when an explicit device execution policy is used. - `thrust::clear` operations on vector types no longer requires the element type to have a default constructor. ## Thrust 1.8.2 (CUDA Toolkit 7.5) Thrust 1.8.2 is a small bug fix release. ### Bug Fixes - Avoid warnings and errors concerning user functions called from `__host__ __device__` functions. - #632: Fix an error in `thrust::set_intersection_by_key` with the CUDA backend. - #651: `thrust::copy` between host and device now accepts execution policies with streams attached, i.e. `thrust::::cuda::par.on(stream)`. - #664: `thrust::for_each` and algorithms based on it no longer ignore streams attached to execution policys. ### Known Issues - #628: `thrust::reduce_by_key` for the CUDA backend fails for Compute Capability 5.0 devices. ## Thrust 1.8.1 (CUDA Toolkit 7.0) Thrust 1.8.1 is a small bug fix release. ### Bug Fixes - #615, #620: Fixed `thrust::for_each` and `thrust::reduce` to no longer fail on large inputs. ### Known Issues - #628: `thrust::reduce_by_key` for the CUDA backend fails for Compute Capability 5.0 devices. ## Thrust 1.8.0 Thrust 1.8.0 introduces support for algorithm invocation from CUDA device code, support for CUDA streams, and algorithm performance improvements. Users may now invoke Thrust algorithms from CUDA device code, providing a parallel algorithms library to CUDA programmers authoring custom kernels, as well as allowing Thrust programmers to nest their algorithm calls within functors. The `thrust::seq` execution policy allows users to require sequential algorithm execution in the calling thread and makes a sequential algorithms library available to individual CUDA threads. The `.on(stream)` syntax allows users to request a CUDA stream for kernels launched during algorithm execution. Finally, new CUDA algorithm implementations provide substantial performance improvements. ### New Features - Algorithms in CUDA Device Code: - Thrust algorithms may now be invoked from CUDA `__device__` and `__host__` __device__ functions. Algorithms invoked in this manner must be invoked with an execution policy as the first parameter. The following execution policies are supported in CUDA __device__ code: - `thrust::seq` - `thrust::cuda::par` - `thrust::device`, when THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA. - Device-side algorithm execution may not be parallelized unless CUDA Dynamic Parallelism is available. - Execution Policies: - CUDA Streams - The `thrust::cuda::par.on(stream)` syntax allows users to request that CUDA kernels launched during algorithm execution should occur on a given stream. - Algorithms executed with a CUDA stream in this manner may still synchronize with other streams when allocating temporary storage or returning results to the CPU. - `thrust::seq`, which allows users to require that an algorithm execute sequentially in the calling thread. - `thrust::complex`, a complex number data type. ### New Examples - simple_cuda_streams demonstrates how to request a CUDA stream during algorithm execution. - async_reduce demonstrates ways to achieve algorithm invocations which are asynchronous with the calling thread. ### Other Enhancements - CUDA sort performance for user-defined types is 300% faster on Tesla K20c for large problem sizes. - CUDA merge performance is 200% faster on Tesla K20c for large problem sizes. - CUDA sort performance for primitive types is 50% faster on Tesla K20c for large problem sizes. - CUDA reduce_by_key performance is 25% faster on Tesla K20c for large problem sizes. - CUDA scan performance is 15% faster on Tesla K20c for large problem sizes. - fallback_allocator example is simpler. ### Bug Fixes - #364: Iterators with unrelated system tags may be used with algorithms invoked with an execution policy - #371: Do not redefine `__CUDA_ARCH__`. - #379: Fix crash when dereferencing transform_iterator on the host. - #391: Avoid use of uppercase variable names. - #392: Fix `thrust::copy` between `cusp::complex` and `std::complex`. - #396: Program compiled with gcc < 4.3 hangs during comparison sort. - #406: `fallback_allocator.cu` example checks device for unified addressing support. - #417: Avoid using `std::less` in binary search algorithms. - #418: Avoid various warnings. - #443: Including version.h no longer configures default systems. - #578: NVCC produces warnings when sequential algorithms are used with CPU systems. ### Known Issues - When invoked with primitive data types, thrust::sort, thrust::sort_by_key, thrust::stable_sort, & thrust::stable_sort_by_key may - Sometimes linking fails when compiling with `-rdc=true` with NVCC. - The CUDA implementation of thrust::reduce_by_key incorrectly outputs the last element in a segment of equivalent keys instead of the first. ### Acknowledgments - Thanks to Sean Baxter for contributing faster CUDA reduce, merge, and scan implementations. - Thanks to Duane Merrill for contributing a faster CUDA radix sort implementation. - Thanks to Filipe Maia for contributing the implementation of thrust::complex. ## Thrust 1.7.2 (CUDA Toolkit 6.5) Thrust 1.7.2 is a minor bug fix release. ### Bug Fixes - Avoid use of `std::min` in generic find implementation. ## Thrust 1.7.1 (CUDA Toolkit 6.0) Thrust 1.7.1 is a minor bug fix release. ### Bug Fixes - Eliminate identifiers in `set_operations.cu` example with leading underscore. - Eliminate unused variable warning in CUDA `reduce_by_key` implementation. - Avoid deriving function objects from `std::unary_function` and `std::binary_function`. ## Thrust 1.7.0 (CUDA Toolkit 5.5) Thrust 1.7.0 introduces a new interface for controlling algorithm execution as well as several new algorithms and performance improvements. With this new interface, users may directly control how algorithms execute as well as details such as the allocation of temporary storage. Key/value versions of thrust::merge and the set operation algorithms have been added, as well stencil versions of partitioning algorithms. thrust::tabulate has been introduced to tabulate the values of functions taking integers. For 32b types, new CUDA merge and set operations provide 2-15x faster performance while a new CUDA comparison sort provides 1.3-4x faster performance. Finally, a new TBB reduce_by_key implementation provides 80% faster performance. ### Breaking Changes - Dispatch: - Custom user backend systems' tag types must now inherit from the corresponding system's execution_policy template (e.g. thrust::cuda::execution_policy) instead of the tag struct (e.g. thrust::cuda::tag). Otherwise, algorithm specializations will silently go unfound during dispatch. See examples/minimal_custom_backend.cu and examples/cuda/fallback_allocator.cu for usage examples. - thrust::advance and thrust::distance are no longer dispatched based on iterator system type and thus may no longer be customized. - Iterators: - iterator_facade and iterator_adaptor's Pointer template parameters have been eliminated. - iterator_adaptor has been moved into the thrust namespace (previously thrust::experimental::iterator_adaptor). - iterator_facade has been moved into the thrust namespace (previously thrust::experimental::iterator_facade). - iterator_core_access has been moved into the thrust namespace (previously thrust::experimental::iterator_core_access). - All iterators' nested pointer typedef (the type of the result of operator->) is now void instead of a pointer type to indicate that such expressions are currently impossible. - Floating point counting_iterators' nested difference_type typedef is now a signed integral type instead of a floating point type. - Other: - normal_distribution has been moved into the thrust::random namespace (previously thrust::random::experimental::normal_distribution). - Placeholder expressions may no longer include the comma operator. ### New Features - Execution Policies: - Users may directly control the dispatch of algorithm invocations with optional execution policy arguments. For example, instead of wrapping raw pointers allocated by cudaMalloc with thrust::device_ptr, the thrust::device execution_policy may be passed as an argument to an algorithm invocation to enable CUDA execution. - The following execution policies are supported in this version: - `thrust::host` - `thrust::device` - `thrust::cpp::par` - `thrust::cuda::par` - `thrust::omp::par` - `thrust::tbb::par` - Algorithms: - `thrust::merge_by_key` - `thrust::partition` with stencil - `thrust::partition_copy` with stencil - `thrust::set_difference_by_key` - `thrust::set_intersection_by_key` - `thrust::set_symmetric_difference_by_key` - `thrust::set_union_by_key` - `thrust::stable_partition with stencil` - `thrust::stable_partition_copy with stencil` - `thrust::tabulate` - Memory Allocation: - `thrust::malloc` - `thrust::free` - `thrust::get_temporary_buffer` - `thrust::return_temporary_buffer` ### New Examples - uninitialized_vector demonstrates how to use a custom allocator to avoid the automatic initialization of elements in thrust::device_vector. ### Other Enhancements - Authors of custom backend systems may manipulate arbitrary state during algorithm dispatch by incorporating it into their execution_policy parameter. - Users may control the allocation of temporary storage during algorithm execution by passing standard allocators as parameters via execution policies such as thrust::device. - THRUST_DEVICE_SYSTEM_CPP has been added as a compile-time target for the device backend. - CUDA merge performance is 2-15x faster. - CUDA comparison sort performance is 1.3-4x faster. - CUDA set operation performance is 1.5-15x faster. - TBB reduce_by_key performance is 80% faster. - Several algorithms have been parallelized with TBB. - Support for user allocators in vectors has been improved. - The sparse_vector example is now implemented with merge_by_key instead of sort_by_key. - Warnings have been eliminated in various contexts. - Warnings about __host__ or __device__-only functions called from __host__ __device__ functions have been eliminated in various contexts. - Documentation about algorithm requirements have been improved. - Simplified the minimal_custom_backend example. - Simplified the cuda/custom_temporary_allocation example. - Simplified the cuda/fallback_allocator example. ### Bug Fixes - #248: Fix broken `thrust::counting_iterator` behavior with OpenMP. - #231, #209: Fix set operation failures with CUDA. - #187: Fix incorrect occupancy calculation with CUDA. - #153: Fix broken multi GPU behavior with CUDA. - #142: Eliminate warning produced by `thrust::random::taus88` and MSVC 2010. - #208: Correctly initialize elements in temporary storage when necessary. - #16: Fix compilation error when sorting bool with CUDA. - #10: Fix ambiguous overloads of `thrust::reinterpret_tag`. ### Known Issues - GCC 4.3 and lower may fail to dispatch thrust::get_temporary_buffer correctly causing infinite recursion in examples such as cuda/custom_temporary_allocation. ### Acknowledgments - Thanks to Sean Baxter, Bryan Catanzaro, and Manjunath Kudlur for contributing a faster merge implementation for CUDA. - Thanks to Sean Baxter for contributing a faster set operation implementation for CUDA. - Thanks to Cliff Woolley for contributing a correct occupancy calculation algorithm. ## Thrust 1.6.0 Thrust 1.6.0 provides an interface for customization and extension and a new backend system based on the Threading Building Blocks library. With this new interface, programmers may customize the behavior of specific algorithms as well as control the allocation of temporary storage or invent entirely new backends. These enhancements also allow multiple different backend systems such as CUDA and OpenMP to coexist within a single program. Support for TBB allows Thrust programs to integrate more naturally into applications which may already employ the TBB task scheduler. ### Breaking Changes - The header has been moved to - thrust::experimental::cuda::pinned_allocator has been moved to thrust::cuda::experimental::pinned_allocator - The macro THRUST_DEVICE_BACKEND has been renamed THRUST_DEVICE_SYSTEM - The macro THRUST_DEVICE_BACKEND_CUDA has been renamed THRUST_DEVICE_SYSTEM_CUDA - The macro THRUST_DEVICE_BACKEND_OMP has been renamed THRUST_DEVICE_SYSTEM_OMP - thrust::host_space_tag has been renamed thrust::host_system_tag - thrust::device_space_tag has been renamed thrust::device_system_tag - thrust::any_space_tag has been renamed thrust::any_system_tag - thrust::iterator_space has been renamed thrust::iterator_system ### New Features - Backend Systems - Threading Building Blocks (TBB) is now supported - Algorithms - `thrust::for_each_n` - `thrust::raw_reference_cast` - Types - `thrust::pointer` - `thrust::reference` ### New Examples - `cuda/custom_temporary_allocation` - `cuda/fallback_allocator` - `device_ptr` - `expand` - `minimal_custom_backend` - `raw_reference_cast` - `set_operations` ### Other Enhancements - `thrust::for_each` now returns the end of the input range similar to most other algorithms. - `thrust::pair` and `thrust::tuple` have swap functionality. - All CUDA algorithms now support large data types. - Iterators may be dereferenced in user `__device__` or `__global__` functions. - The safe use of different backend systems is now possible within a single binary ### Bug Fixes - #469 `min_element` and `max_element` algorithms no longer require a const comparison operator ### Known Issues - NVCC may crash when parsing TBB headers on Windows. ## Thrust 1.5.3 (CUDA Toolkit 5.0) Thrust 1.5.3 is a minor bug fix release. ### Bug Fixes - Avoid warnings about potential race due to `__shared__` non-POD variable ## Thrust 1.5.2 (CUDA Toolkit 4.2) Thrust 1.5.2 is a minor bug fix release. ### Bug Fixes - Fixed warning about C-style initialization of structures ## Thrust 1.5.1 (CUDA Toolkit 4.1) Thrust 1.5.1 is a minor bug fix release. ### Bug Fixes - Sorting data referenced by permutation_iterators on CUDA produces invalid results ## Thrust 1.5.0 Thrust 1.5.0 provides introduces new programmer productivity and performance enhancements. New functionality for creating anonymous "lambda" functions has been added. A faster host sort provides 2-10x faster performance for sorting arithmetic types on (single-threaded) CPUs. A new OpenMP sort provides 2.5x-3.0x speedup over the host sort using a quad-core CPU. When sorting arithmetic types with the OpenMP backend the combined performance improvement is 5.9x for 32-bit integers and ranges from 3.0x (64-bit types) to 14.2x (8-bit types). A new CUDA `reduce_by_key` implementation provides 2-3x faster performance. ### Breaking Changes - device_ptr no longer unsafely converts to device_ptr without an explicit cast. Use the expression device_pointer_cast(static_cast(void_ptr.get())) to convert, for example, device_ptr to device_ptr. ### New Features - Algorithms: - Stencil-less `thrust::transform_if`. - Lambda placeholders ### New Examples - lambda ### Other Enhancements - Host sort is 2-10x faster for arithmetic types - OMP sort provides speedup over host sort - `reduce_by_key` is 2-3x faster - `reduce_by_key` no longer requires O(N) temporary storage - CUDA scan algorithms are 10-40% faster - `host_vector` and `device_vector` are now documented - out-of-memory exceptions now provide detailed information from CUDART - improved histogram example - `device_reference` now has a specialized swap - `reduce_by_key` and scan algorithms are compatible with `discard_iterator` ### Bug Fixes - #44: Allow `thrust::host_vector` to compile when `value_type` uses `__align__`. - #198: Allow `thrust::adjacent_difference` to permit safe in-situ operation. - #303: Make thrust thread-safe. - #313: Avoid race conditions in `thrust::device_vector::insert`. - #314: Avoid unintended ADL invocation when dispatching copy. - #365: Fix merge and set operation failures. ### Known Issues - None ### Acknowledgments - Thanks to Manjunath Kudlur for contributing his Carbon library, from which the lambda functionality is derived. - Thanks to Jean-Francois Bastien for suggesting a fix for #303. ## Thrust 1.4.0 (CUDA Toolkit 4.0) Thrust 1.4.0 is the first release of Thrust to be included in the CUDA Toolkit. Additionally, it brings many feature and performance improvements. New set theoretic algorithms operating on sorted sequences have been added. Additionally, a new fancy iterator allows discarding redundant or otherwise unnecessary output from algorithms, conserving memory storage and bandwidth. ### Breaking Changes - Eliminations - `thrust/is_sorted.h` - `thrust/utility.h` - `thrust/set_intersection.h` - `thrust/experimental/cuda/ogl_interop_allocator.h` and the functionality therein - `thrust::deprecated::copy_when` - `thrust::deprecated::absolute_value` - `thrust::deprecated::copy_when` - `thrust::deprecated::absolute_value` - `thrust::deprecated::copy_when` - `thrust::deprecated::absolute_value` - `thrust::gather` and `thrust::scatter` from host to device and vice versa are no longer supported. - Operations which modify the elements of a thrust::device_vector are no longer available from source code compiled without nvcc when the device backend is CUDA. Instead, use the idiom from the cpp_interop example. ### New Features - Algorithms: - `thrust::copy_n` - `thrust::merge` - `thrust::set_difference` - `thrust::set_symmetric_difference` - `thrust::set_union` - Types - `thrust::discard_iterator` - Device Support: - Compute Capability 2.1 GPUs. ### New Examples - run_length_decoding ### Other Enhancements - Compilation warnings are substantially reduced in various contexts. - The compilation time of thrust::sort, thrust::stable_sort, thrust::sort_by_key, and thrust::stable_sort_by_key are substantially reduced. - A fast sort implementation is used when sorting primitive types with thrust::greater. - The performance of thrust::set_intersection is improved. - The performance of thrust::fill is improved on SM 1.x devices. - A code example is now provided in each algorithm's documentation. - thrust::reverse now operates in-place ### Bug Fixes - #212: `thrust::set_intersection` works correctly for large input sizes. - #275: `thrust::counting_iterator` and `thrust::constant_iterator` work correctly with OpenMP as the backend when compiling with optimization. - #256: `min` and `max` correctly return their first argument as a tie-breaker - #248: `NDEBUG` is interpreted incorrectly ### Known Issues - NVCC may generate code containing warnings when compiling some Thrust algorithms. - When compiling with `-arch=sm_1x`, some Thrust algorithms may cause NVCC to issue benign pointer advisories. - When compiling with `-arch=sm_1x` and -G, some Thrust algorithms may fail to execute correctly. - `thrust::inclusive_scan`, `thrust::exclusive_scan`, `thrust::inclusive_scan_by_key`, and `thrust::exclusive_scan_by_key` are currently incompatible with `thrust::discard_iterator`. ### Acknowledgments - Thanks to David Tarjan for improving the performance of set_intersection. - Thanks to Duane Merrill for continued help with sort. - Thanks to Nathan Whitehead for help with CUDA Toolkit integration. ## Thrust 1.3.0 Thrust 1.3.0 provides support for CUDA Toolkit 3.2 in addition to many feature and performance enhancements. Performance of the sort and sort_by_key algorithms is improved by as much as 3x in certain situations. The performance of stream compaction algorithms, such as copy_if, is improved by as much as 2x. CUDA errors are now converted to runtime exceptions using the system_error interface. Combined with a debug mode, also new in 1.3, runtime errors can be located with greater precision. Lastly, a few header files have been consolidated or renamed for clarity. See the deprecations section below for additional details. ### Breaking Changes - Promotions - thrust::experimental::inclusive_segmented_scan has been renamed thrust::inclusive_scan_by_key and exposes a different interface - thrust::experimental::exclusive_segmented_scan has been renamed thrust::exclusive_scan_by_key and exposes a different interface - thrust::experimental::partition_copy has been renamed thrust::partition_copy and exposes a different interface - thrust::next::gather has been renamed thrust::gather - thrust::next::gather_if has been renamed thrust::gather_if - thrust::unique_copy_by_key has been renamed thrust::unique_by_key_copy - Deprecations - thrust::copy_when has been renamed thrust::deprecated::copy_when - thrust::absolute_value has been renamed thrust::deprecated::absolute_value - The header thrust/set_intersection.h is now deprecated; use thrust/set_operations.h instead - The header thrust/utility.h is now deprecated; use thrust/swap.h instead - The header thrust/swap_ranges.h is now deprecated; use thrust/swap.h instead - Eliminations - thrust::deprecated::gather - thrust::deprecated::gather_if - thrust/experimental/arch.h and the functions therein - thrust/sorting/merge_sort.h - thrust/sorting/radix_sort.h - NVCC 2.3 is no longer supported ### New Features - Algorithms: - `thrust::exclusive_scan_by_key` - `thrust::find` - `thrust::find_if` - `thrust::find_if_not` - `thrust::inclusive_scan_by_key` - `thrust::is_partitioned` - `thrust::is_sorted_until` - `thrust::mismatch` - `thrust::partition_point` - `thrust::reverse` - `thrust::reverse_copy` - `thrust::stable_partition_copy` - Types: - `thrust::system_error` and related types. - `thrust::experimental::cuda::ogl_interop_allocator`. - `thrust::bit_and`, `thrust::bit_or`, and `thrust::bit_xor`. - Device Support: - GF104-based GPUs. ### New Examples - opengl_interop.cu - repeated_range.cu - simple_moving_average.cu - sparse_vector.cu - strided_range.cu ### Other Enhancements - Performance of thrust::sort and thrust::sort_by_key is substantially improved for primitive key types - Performance of thrust::copy_if is substantially improved - Performance of thrust::reduce and related reductions is improved - THRUST_DEBUG mode added - Callers of Thrust functions may detect error conditions by catching thrust::system_error, which derives from std::runtime_error - The number of compiler warnings generated by Thrust has been substantially reduced - Comparison sort now works correctly for input sizes > 32M - min & max usage no longer collides with definitions - Compiling against the OpenMP backend no longer requires nvcc - Performance of device_vector initialized in .cpp files is substantially improved in common cases - Performance of thrust::sort_by_key on the host is substantially improved ### Bug Fixes - Debug device code now compiles correctly - thrust::uninitialized_copy and thrust::uninitialized_fill now dispatch constructors on the device rather than the host ### Known Issues - #212 set_intersection is known to fail for large input sizes - partition_point is known to fail for 64b types with nvcc 3.2 Acknowledgments - Thanks to Duane Merrill for contributing a fast CUDA radix sort implementation - Thanks to Erich Elsen for contributing an implementation of find_if - Thanks to Andrew Corrigan for contributing changes which allow the OpenMP backend to compile in the absence of nvcc - Thanks to Andrew Corrigan, Cliff Wooley, David Coeurjolly, Janick Martinez Esturo, John Bowers, Maxim Naumov, Michael Garland, and Ryuta Suzuki for bug reports - Thanks to Cliff Woolley for help with testing ## Thrust 1.2.1 Thrust 1.2.1 is a small bug fix release that is compatible with the CUDA Toolkit 3.1 release. ### Known Issues - `thrust::inclusive_scan` and `thrust::exclusive_scan` may fail with very large types. - MSVC may fail to compile code using both sort and binary search algorithms. - `thrust::uninitialized_fill` and `thrust::uninitialized_copy` dispatch constructors on the host rather than the device. - #109: Some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads. - `thrust::default_random_engine::discard` is not accelerated with NVCC 2.3 - NVCC 3.1 may fail to compile code using types derived from `thrust::subtract_with_carry_engine`, such as `thrust::ranlux24` and `thrust::ranlux48`. ## Thrust 1.2.0 Thrust 1.2.0 introduces support for compilation to multicore CPUs and the Ocelot virtual machine, and several new facilities for pseudo-random number generation. New algorithms such as set intersection and segmented reduction have also been added. Lastly, improvements to the robustness of the CUDA backend ensure correctness across a broad set of (uncommon) use cases. ### Breaking Changes - `thrust::gather`'s interface was incorrect and has been removed. The old interface is deprecated but will be preserved for Thrust version 1.2 at `thrust::deprecated::gather` and `thrust::deprecated::gather_if`. The new interface is provided at `thrust::next::gather` and `thrust::next::gather_if`. The new interface will be promoted to `thrust::` in Thrust version 1.3. For more details, please refer to [this thread](http://groups.google.com/group/thrust-users/browse_thread/thread/f5f0583cb97b51fd). - The `thrust::sorting` namespace has been deprecated in favor of the top-level sorting functions, such as `thrust::sort` and `thrust::sort_by_key`. - Removed support for `thrust::equal` between host & device sequences. - Removed support for `thrust::scatter` between host & device sequences. ### New Features - Algorithms: - `thrust::reduce_by_key` - `thrust::set_intersection` - `thrust::unique_copy` - `thrust::unique_by_key` - `thrust::unique_copy_by_key` - Types - Random Number Generation: - `thrust::discard_block_engine` - `thrust::default_random_engine` - `thrust::linear_congruential_engine` - `thrust::linear_feedback_shift_engine` - `thrust::subtract_with_carry_engine` - `thrust::xor_combine_engine` - `thrust::minstd_rand` - `thrust::minstd_rand0` - `thrust::ranlux24` - `thrust::ranlux48` - `thrust::ranlux24_base` - `thrust::ranlux48_base` - `thrust::taus88` - `thrust::uniform_int_distribution` - `thrust::uniform_real_distribution` - `thrust::normal_distribution` (experimental) - Function Objects: - `thrust::project1st` - `thrust::project2nd` - `thrust::tie` - Fancy Iterators: - `thrust::permutation_iterator` - `thrust::reverse_iterator` - Vector Functions: - `operator!=` - `rbegin` - `crbegin` - `rend` - `crend` - `data` - `shrink_to_fit` - Device Support: - Multicore CPUs via OpenMP. - Fermi-class GPUs. - Ocelot virtual machines. - Support for NVCC 3.0. ### New Examples - `cpp_integration` - `histogram` - `mode` - `monte_carlo` - `monte_carlo_disjoint_sequences` - `padded_grid_reduction` - `permutation_iterator` - `row_sum` - `run_length_encoding` - `segmented_scan` - `stream_compaction` - `summary_statistics` - `transform_iterator` - `word_count` ### Other Enhancements - Integer sorting performance is improved when max is large but (max - min) is small and when min is negative - Performance of `thrust::inclusive_scan` and `thrust::exclusive_scan` is improved by 20-25% for primitive types. ### Bug Fixes - #8 cause a compiler error if the required compiler is not found rather than a mysterious error at link time - #42 device_ptr & device_reference are classes rather than structs, eliminating warnings on certain platforms - #46 gather & scatter handle any space iterators correctly - #51 thrust::experimental::arch functions gracefully handle unrecognized GPUs - #52 avoid collisions with common user macros such as BLOCK_SIZE - #62 provide better documentation for device_reference - #68 allow built-in CUDA vector types to work with device_vector in pure C++ mode - #102 eliminated a race condition in device_vector::erase - various compilation warnings eliminated ### Known Issues - inclusive_scan & exclusive_scan may fail with very large types - MSVC may fail to compile code using both sort and binary search algorithms - uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device - #109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads - default_random_engine::discard is not accelerated with nvcc 2.3 ### Acknowledgments - Thanks to Gregory Diamos for contributing a CUDA implementation of set_intersection - Thanks to Ryuta Suzuki & Gregory Diamos for rigorously testing Thrust's unit tests and examples against Ocelot - Thanks to Tom Bradley for contributing an implementation of normal_distribution - Thanks to Joseph Rhoads for contributing the example summary_statistics ## Thrust 1.1.1 Thrust 1.1.1 is a small bug fix release that is compatible with the CUDA Toolkit 2.3a release and Mac OSX Snow Leopard. ## Thrust 1.1.0 Thrust 1.1.0 introduces fancy iterators, binary search functions, and several specialized reduction functions. Experimental support for segmented scans has also been added. ### Breaking Changes - `thrust::counting_iterator` has been moved into the `thrust` namespace (previously `thrust::experimental`). ### New Features - Algorithms: - `thrust::copy_if` - `thrust::lower_bound` - `thrust::upper_bound` - `thrust::vectorized lower_bound` - `thrust::vectorized upper_bound` - `thrust::equal_range` - `thrust::binary_search` - `thrust::vectorized binary_search` - `thrust::all_of` - `thrust::any_of` - `thrust::none_of` - `thrust::minmax_element` - `thrust::advance` - `thrust::inclusive_segmented_scan` (experimental) - `thrust::exclusive_segmented_scan` (experimental) - Types: - `thrust::pair` - `thrust::tuple` - `thrust::device_malloc_allocator` - Fancy Iterators: - `thrust::constant_iterator` - `thrust::counting_iterator` - `thrust::transform_iterator` - `thrust::zip_iterator` ### New Examples - Computing the maximum absolute difference between vectors. - Computing the bounding box of a two-dimensional point set. - Sorting multiple arrays together (lexicographical sorting). - Constructing a summed area table. - Using `thrust::zip_iterator` to mimic an array of structs. - Using `thrust::constant_iterator` to increment array values. ### Other Enhancements - Added pinned memory allocator (experimental). - Added more methods to host_vector & device_vector (issue #4). - Added variant of remove_if with a stencil argument (issue #29). - Scan and reduce use cudaFuncGetAttributes to determine grid size. - Exceptions are reported when temporary device arrays cannot be allocated. ### Bug Fixes - #5: Make vector work for larger data types - #9: stable_partition_copy doesn't respect OutputIterator concept semantics - #10: scans should return OutputIterator - #16: make algorithms work for larger data types - #27: Dispatch radix_sort even when comp=less is explicitly provided ### Known Issues - Using functors with Thrust entry points may not compile on Mac OSX with gcc 4.0.1. - `thrust::uninitialized_copy` and `thrust::uninitialized_fill` dispatch constructors on the host rather than the device. - `thrust::inclusive_scan`, `thrust::inclusive_scan_by_key`, `thrust::exclusive_scan`, and `thrust::exclusive_scan_by_key` may fail when used with large types with the CUDA Toolkit 3.1. ## Thrust 1.0.0 First production release of Thrust. ### Breaking Changes - Rename top level namespace `komrade` to `thrust`. - Move `thrust::partition_copy` & `thrust::stable_partition_copy` into `thrust::experimental` namespace until we can easily provide the standard interface. - Rename `thrust::range` to `thrust::sequence` to avoid collision with Boost.Range. - Rename `thrust::copy_if` to `thrust::copy_when` due to semantic differences with C++0x `std::copy_if`. ### New Features - Add C++0x style `cbegin` & `cend` methods to `thrust::host_vector` and `thrust::device_vector`. - Add `thrust::transform_if` function. - Add stencil versions of `thrust::replace_if` & `thrust::replace_copy_if`. - Allow `counting_iterator` to work with `thrust::for_each`. - Allow types with constructors in comparison `thrust::sort` and `thrust::reduce`. ### Other Enhancements - `thrust::merge_sort` and `thrust::stable_merge_sort` are now 2x to 5x faster when executed on the parallel device. ### Bug Fixes - Komrade 6: Workaround an issue where an incremented iterator causes NVCC to crash. - Komrade 7: Fix an issue where `const_iterator`s could not be passed to `thrust::transform`. rocThrust-rocm-5.7.1/docs/000077500000000000000000000000001450263404500154125ustar00rootroot00000000000000rocThrust-rocm-5.7.1/docs/.doxygen/000077500000000000000000000000001450263404500171455ustar00rootroot00000000000000rocThrust-rocm-5.7.1/docs/.doxygen/Doxyfile000066400000000000000000003166071450263404500206700ustar00rootroot00000000000000# Doxyfile 1.8.10 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = rocThrust # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = v3.0.1.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "prototype interfaces compatible with ROCm platform and HiP" # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = docBin # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = .. # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = YES # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = YES # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- SHOW_NAMESPACES = NO # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = ../../thrust \ ../../thrust/async \ ../../thrust/iterator \ ../../thrust/mr \ ../../thrust/random \ ../../thrust/system # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf, *.as and *.js. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = ../README.md #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # compiled with the --with-libclang option. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 1 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /