pax_global_header00006660000000000000000000000064144734267740014534gustar00rootroot0000000000000052 comment=aadb6e35e8968a9ef1728b0f9dfed1901536e029 rocSPARSE-rocm-5.7.1/000077500000000000000000000000001447342677400142055ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/.clang-format000066400000000000000000000065421447342677400165670ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'true' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentPPDirectives: None #FixNamespaceComments: true IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve --- rocSPARSE-rocm-5.7.1/.githooks/000077500000000000000000000000001447342677400161125ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/.githooks/install000077500000000000000000000002231447342677400175030ustar00rootroot00000000000000#!/usr/bin/env bash cd $(git rev-parse --git-dir) cd hooks echo "Installing hooks..." ln -fs ../../.githooks/pre-commit pre-commit echo "Done!" rocSPARSE-rocm-5.7.1/.githooks/pre-commit000077500000000000000000000041201447342677400201110ustar00rootroot00000000000000#!/bin/bash # # This pre-commit hook checks if any versions of clang-format # are installed, and if so, uses the installed version to format # the staged changes. export PATH=/usr/bin:/bin set -x format=/opt/rocm/llvm/bin/clang-format # Redirect stdout to stderr. exec >&2 # Do everything from top - level cd $(git rev-parse --show-toplevel) if git rev-parse --verify HEAD >/dev/null 2>&1; then against=HEAD else # Initial commit: diff against an empty tree object against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 fi if [[ "$1" == "--reformat" ]]; then files=$(git ls-files --exclude-standard) else files=$(git diff-index --cached --name-only $against) fi [[ -z "$files" ]] && exit # Change the copyright date at the top of any text files for file in $files; do if [[ -e $file ]]; then /usr/bin/perl -pi -e 'INIT { exit 1 if !-f $ARGV[0] || -B $ARGV[0]; $year = (localtime)[5] + 1900 } s/^([*\/#[:space:]]*)Copyright\s+(?:\(C\)\s*)?(\d+)(?:\s*-\s*\d+)?/qq($1Copyright (C) $2@{[$year != $2 ? "-$year" : ""]})/ie if $. < 10' "$file" && git add -u "$file" fi done # do the formatting for file in $files; do if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.in$|\.txt$|\.yaml$|\.sh$|\.py$|\.pl$|\.cmake$|\.md$|\.rst$|\.groovy$'; then sed -i -e 's/[[:space:]]*$//' "$file" # Remove whitespace at end of lines sed -i -e '$a\' "$file" # Add missing newline to end of file # Convert UTF8 non-ASCII to ASCII temp=$(mktemp) [[ -w $temp ]] || exit iconv -s -f utf-8 -t ascii//TRANSLIT "$file" > "$temp" || exit chmod --reference="$file" "$temp" || exit mv -f "$temp" "$file" || exit git add -u "$file" fi done # if clang-format exists, run it on C/C++ files if [[ -x $format ]]; then for file in $files; do if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then echo "$format $file" "$format" -i -style=file "$file" git add -u "$file" fi done fi rocSPARSE-rocm-5.7.1/.github/000077500000000000000000000000001447342677400155455ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/.github/BUG_REPORT.md000066400000000000000000000004531447342677400176010ustar00rootroot00000000000000### What is the expected behavior - ### What actually happens - ### How to reproduce - ### Environment | Hardware | description | |-----|-----| | GPU | device string | | CPU | device string | | Software | version | |-----|-----| | ROCK | v0.0 | | ROCR | v0.0 | | HCC | v0.0 | | Library | v0.0 | rocSPARSE-rocm-5.7.1/.github/CODEOWNERS000066400000000000000000000000451447342677400171370ustar00rootroot00000000000000* @ntrost57 @YvanMokwinski @jsandham rocSPARSE-rocm-5.7.1/.github/CONTRIBUTING.md000066400000000000000000000040061447342677400177760ustar00rootroot00000000000000## Contribution License Agreement 1. The code I am contributing is mine, and I have the right to license it. 2. By submitting a pull request for this project I am granting you a license to distribute said code under the MIT License for the project. ## How to contribute Our code contriubtion guidelines closely follows the model of [GitHub pull-requests](https://help.github.com/articles/using-pull-requests/). This repository follows the [git flow](http://nvie.com/posts/a-successful-git-branching-model/) workflow, which dictates a /master branch where releases are cut, and a /develop branch which serves as an integration branch for new code. * A [git extention](https://github.com/nvie/gitflow) has been developed to ease the use of the 'git flow' methodology, but requires manual installation by the user. Refer to the projects wiki ## Pull-request guidelines * target the **develop** branch for integration * ensure code builds successfully. * do not break existing test cases * new functionality will only be merged with new unit tests * new unit tests should integrate within the existing [googletest framework](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) * tests must have good code coverage * code must also have benchmark tests, and performance must approach the compute bound limit or memory bound limit. ## Format C and C++ code is formatted using `clang-format`. Use the clang-format version for Clang 9, which is available in the `/opt/rocm` directory. Please do not use your system's built-in `clang-format`, as this is an older version that will result in different results. To format a file, use: ``` /opt/rocm/hcc/bin/clang-format -style=file -i ``` To format all files, run the following script in rocSPARSE directory: ``` #!/bin/bash git ls-files -z *.cc *.cpp *.h *.hpp *.cl *.h.in *.hpp.in *.cpp.in | xargs -0 /opt/rocm/hcc/bin/clang-format -style=file -i ``` Also, githooks can be installed to format the code per-commit: ``` ./.githooks/install ``` rocSPARSE-rocm-5.7.1/.github/ISSUE_TEMPLATE.md000066400000000000000000000004531447342677400202540ustar00rootroot00000000000000### What is the expected behavior - ### What actually happens - ### How to reproduce - ### Environment | Hardware | description | |-----|-----| | GPU | device string | | CPU | device string | | Software | version | |-----|-----| | ROCK | v0.0 | | ROCR | v0.0 | | HCC | v0.0 | | Library | v0.0 | rocSPARSE-rocm-5.7.1/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000000621447342677400213440ustar00rootroot00000000000000resolves #___ Summary of proposed changes: - - - rocSPARSE-rocm-5.7.1/.github/workflows/000077500000000000000000000000001447342677400176025ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/.github/workflows/docs.yaml000066400000000000000000000044631447342677400214250ustar00rootroot00000000000000name: Upload to the upload server # Controls when the workflow will run on: push: branches: [develop, master] tags: - rocm-5.* release: types: [published] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - name: getting branch name shell: bash run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: branch_name - name: getting tag name shell: bash run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" id: tag_name - name: zipping files run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' - name: echo-step run: echo "${{ github.event.release.target_commitish }}" - name: uploading archive to prod if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.PROD_UPLOAD_URL }}' args: '-o ConnectTimeout=5' - name: uploading archive to staging if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.STG_UPLOAD_URL }}' args: '-o ConnectTimeout=5' rocSPARSE-rocm-5.7.1/.gitignore000066400000000000000000000005751447342677400162040ustar00rootroot00000000000000# Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app # vim tags tags .tags .*.swp # Editors .vscode # build-in-source directory build # doc directory docBin _build # matrices *.csr *.mtx rocSPARSE-rocm-5.7.1/.jenkins/000077500000000000000000000000001447342677400157245ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/.jenkins/codecov.groovy000066400000000000000000000052011447342677400206130ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'CodeCov') // customize for project prj.paths.build_command = './install.sh --matrices-dir-install ${JENKINS_HOME_DIR}/rocsparse_matrices && ./install.sh -kc --codecoverage --matrices-dir ${JENKINS_HOME_DIR}/rocsparse_matrices' prj.libraryDependencies = ['rocPRIM'] prj.defaults.ccache = false // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> def gfilter = "*pre_checkin*" commonGroovy.runCoverageCommand(platform, project, gfilter, "release-debug") } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 6')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])] ] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu18:['gfx900']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900']], urlJobName) } } } rocSPARSE-rocm-5.7.1/.jenkins/common.groovy000066400000000000000000000132171447342677400204670ustar00rootroot00000000000000// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean sameOrg=false) { project.paths.construct_build_prefix() String compiler = jobName.contains('hipclang') ? 'hipcc' : 'hcc' String hipClangArgs = jobName.contains('hipclang') ? ' --hip-clang' : '' //Temporary workaround due to bug in container String centos7Workaround = platform.jenkinsLabel.contains('centos7') ? 'export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib64/' : '' def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, null, sameOrg) } } def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} ${getDependenciesCommand} export LD_LIBRARY_PATH=/opt/rocm/lib/ ${centos7Workaround} CXX=/opt/rocm/bin/${compiler} ${project.paths.build_command} ${hipClangArgs} """ platform.runCommand(this, command) } def runTestCommand (platform, project, gfilter, String dirmode = "release") { //Temporary workaround due to bug in container String centos7Workaround = platform.jenkinsLabel.contains('centos7') ? 'export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib64/' : '' def hmmTestCommand= '' if (platform.jenkinsLabel.contains('gfx90a')) { hmmTestCommand = """ ROCSPARSE_MALLOC_MANAGED=1 GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./rocsparse-test --gtest_output=xml:test_detail_hmm_xnack_off.xml --gtest_color=yes --gtest_filter=*csrmv_managed* HSA_XNACK=1 ROCSPARSE_MALLOC_MANAGED=1 GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./rocsparse-test --gtest_output=xml:test_detail_hmm_xnack_on.xml --gtest_color=yes --gtest_filter=*csrmv_managed* """ } def command = """#!/usr/bin/env bash set -ex cd ${project.paths.project_build_prefix}/build/${dirmode}/clients/staging export LD_LIBRARY_PATH=/opt/rocm/lib/ ${centos7Workaround} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./rocsparse-test --gtest_output=xml --gtest_color=yes --gtest_filter=${gfilter}-*known_bug* ${hmmTestCommand} """ platform.runCommand(this, command) junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" } def runTestWithSanitizerCommand (platform, project, gfilter, String dirmode = "release") { //Temporary workaround due to bug in container String centos7Workaround = platform.jenkinsLabel.contains('centos7') ? 'export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib64/' : '' def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix}/build/${dirmode}/clients/staging export ASAN_LIB_PATH=\$(/opt/rocm/llvm/bin/clang -print-file-name=libclang_rt.asan-x86_64.so) export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:\$(dirname "\${ASAN_LIB_PATH}") ${centos7Workaround} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ASAN_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer ASAN_OPTIONS=detect_leaks=1 LSAN_OPTIONS=suppressions=../../../../suppr.txt ./rocsparse-test --gtest_output=xml --gtest_color=yes --gtest_filter=${gfilter}-*known_bug* """ platform.runCommand(this, command) junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" } def runCoverageCommand (platform, project, gfilter, String dirmode = "release") { //Temporary workaround due to bug in container String centos7Workaround = platform.jenkinsLabel.contains('centos7') ? 'export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib64/' : '' def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix}/build/${dirmode} export LD_LIBRARY_PATH=/opt/rocm/lib/ ${centos7Workaround} GTEST_LISTENER=NO_PASS_LINE_IN_LOG make coverage_cleanup coverage GTEST_FILTER=${gfilter}-*known_bug* """ platform.runCommand(this, command) publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: "${project.paths.project_build_prefix}/build/${dirmode}/lcoverage", reportFiles: "index.html", reportName: "Code coverage report", reportTitles: "Code coverage report"]) } def runPackageCommand(platform, project, String dirmode = "release") { def command String pkgType String pkgInfoCommand if(platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('sles') || platform.jenkinsLabel.contains('rhel') || platform.jenkinsLabel.contains('cs9')) { pkgType = "rpm" pkgInfoCommand = "rpm -qlp package/*.rpm" } else { pkgType = "deb" pkgInfoCommand = "for pkg in package/*.deb; do dpkg -I \$pkg; dpkg -c \$pkg; done" } command = """ set -x cd ${project.paths.project_build_prefix}/build/${dirmode} make package mkdir -p package mv *.${pkgType} package/ ${pkgInfoCommand} """ platform.runCommand(this, command) platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/build/${dirmode}/package/*.${pkgType}""") } return this rocSPARSE-rocm-5.7.1/.jenkins/debug.groovy000066400000000000000000000041201447342677400202560ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'Debug') // customize for project prj.paths.build_command = './install.sh --matrices-dir-install ${JENKINS_HOME_DIR}/rocsparse_matrices && ./install.sh -c -g --matrices-dir ${JENKINS_HOME_DIR}/rocsparse_matrices' prj.libraryDependencies = ['rocPRIM'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, null, null) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['any']])] jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900', 'gfx906']], urlJobName) } } } rocSPARSE-rocm-5.7.1/.jenkins/extended.groovy000066400000000000000000000057631447342677400210060ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'Extended') // customize for project prj.paths.build_command = './install.sh --matrices-dir-install ${JENKINS_HOME_DIR}/rocsparse_matrices && ./install.sh -c --matrices-dir ${JENKINS_HOME_DIR}/rocsparse_matrices' prj.libraryDependencies = ['rocPRIM'] prj.timeout.test = 600 prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> def gfilter = "*nightly*" commonGroovy.runTestCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 6')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu18:['gfx900'],centos7:['gfx908'],sles15sp1:['gfx906']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx908'],centos8:['any'],sles15sp1:['gfx906']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900', 'gfx906']], urlJobName) } } } rocSPARSE-rocm-5.7.1/.jenkins/precheckin.groovy000066400000000000000000000052121447342677400213060ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'PreCheckin') // customize for project prj.paths.build_command = './install.sh --matrices-dir-install ${JENKINS_HOME_DIR}/rocsparse_matrices && ./install.sh -c --matrices-dir ${JENKINS_HOME_DIR}/rocsparse_matrices' prj.libraryDependencies = ['rocPRIM'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> def gfilter = "*quick*:*pre_checkin*" commonGroovy.runTestCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],centos8:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900', 'gfx906']], urlJobName) } } } rocSPARSE-rocm-5.7.1/.jenkins/sanitizer.groovy000066400000000000000000000057171447342677400212150ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'PreCheckin') // customize for project prj.paths.build_command = './install.sh --matrices-dir-install ${JENKINS_HOME_DIR}/rocsparse_matrices && ./install.sh -c --address-sanitizer --matrices-dir ${JENKINS_HOME_DIR}/rocsparse_matrices' prj.libraryDependencies = ['rocPRIM'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> def gfilter = "*quick*:*pre_checkin*" commonGroovy.runTestWithSanitizerCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 6')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu18:['gfx900'],centos7:['gfx908'],sles15sp1:['gfx906']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900', 'gfx906']], urlJobName) } } } rocSPARSE-rocm-5.7.1/.jenkins/staticanalysis.groovy000066400000000000000000000017371447342677400222360ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'StaticAnalysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true boolean staticAnalysis = true buildProject(prj, formatCheck, nodes.dockerArray, null, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 6')])])) stage(urlJobName) { runCI([ubuntu20:['cpu']], urlJobName) } } rocSPARSE-rocm-5.7.1/.jenkins/staticlibrary.groovy000066400000000000000000000057521447342677400220600ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocSPARSE', 'Static Library PreCheckin') // customize for project prj.paths.build_command = './install.sh --matrices-dir-install ${JENKINS_HOME_DIR}/rocsparse_matrices && ./install.sh -c --static --matrices-dir ${JENKINS_HOME_DIR}/rocsparse_matrices' prj.libraryDependencies = ['rocPRIM'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, true) } def testCommand = { platform, project-> def gfilter = "*quick*:*pre_checkin*" commonGroovy.runTestCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 6')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu18:['gfx900'],centos7:['gfx908'],sles15sp1:['gfx906']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx908'],sles15sp1:['gfx906']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx908'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName) } } } rocSPARSE-rocm-5.7.1/.readthedocs.yaml000066400000000000000000000004171447342677400174360ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/source/conf.py formats: all python: version: "3.8" install: - requirements: docs/source/requirements.txt rocSPARSE-rocm-5.7.1/CHANGELOG.md000066400000000000000000000153661447342677400160310ustar00rootroot00000000000000# Change Log for rocSPARSE Full documentation for rocSPARSE is available at [rocsparse.readthedocs.io](https://rocsparse.readthedocs.io/en/latest/). ## rocSPARSE 2.5.4 for ROCm 5.7.0 ### Added - Added more mixed precisions for SpMV, (matrix: float, vectors: double, calculation: double) and (matrix: rocsparse_float_complex, vectors: rocsparse_double_complex, calculation: rocsparse_double_complex) - Added support for gfx940, gfx941 and gfx942 ### Improved - Fixed a bug in csrsm and bsrsm ### Known Issues In csritlu0, the algorithm rocsparse_itilu0_alg_sync_split_fusion has some accuracy issues to investigate with XNACK enabled. The fallback is rocsparse_itilu0_alg_sync_split. ## rocSPARSE 2.5.2 for ROCm 5.6.0 ### Improved - Fixed a memory leak in csritsv - Fixed a bug in csrsm and bsrsm ## rocSPARSE 2.5.1 for ROCm 5.5.0 ### Added - Added bsrgemm and spgemm for BSR format - Added bsrgeam - Added build support for Navi32 - Added experimental hipGraph support for some rocSPARSE routines - Added csritsv, spitsv csr iterative triangular solve - Added mixed precisions for SpMV - Added batched SpMM for transpose A in COO format with atomic atomic algorithm ### Improved - Optimization to csr2bsr - Optimization to csr2csr_compress - Optimization to csr2coo - Optimization to gebsr2csr - Optimization to csr2gebsr - Fixes to documentation - Fixes a bug in COO SpMV gridsize - Fixes a bug in SpMM gridsize when using very large matrices ### Known Issues - In csritlu0, the algorithm rocsparse_itilu0_alg_sync_split_fusion has some accuracy issues to investigate with XNACK enabled. The fallback is rocsparse_itilu0_alg_sync_split. ## rocSPARSE 2.4.0 for ROCm 5.4.0 ### Added - Added rocsparse_spmv_ex routine - Added rocsparse_bsrmv_ex_analysis and rocsparse_bsrmv_ex routines - Added csritilu0 routine - Added build support for Navi31 and Navi 33 ### Improved - Optimization to segmented algorithm for COO SpMV by performing analysis - Improve performance when generating random matrices. - Fixed bug in ellmv - Optimized bsr2csr routine - Fixed integer overflow bugs ## rocSPARSE 2.3.2 for ROCm 5.3.0 ### Added - Transpose A for SpMM COO format - Added matrix checker routines for verifying matrix data - Added atomic algorithm for COO SpMV - Added bsrpad routine ### Improved - Fixed a bug in csrilu0 which could cause a deadlock - Fixed a bug where asynchronous memcpy would use wrong stream - Fixed potential size overflows ## rocSPARSE 2.2.0 for ROCm 5.2.0 ### Added - Batched SpMM for CSR, CSC, and COO formats. - Packages for test and benchmark executables on all supported OSes using CPack. - Clients file importers and exporters. ### Improved - Clients code size reduction. - Clients error handling. - Clients benchmarking for performance tracking. ### Changed - Test adjustments due to roundoff errors. - Fixing API calls compatibility with rocPRIM. ### Known Issues - none ## rocSPARSE 2.1.0 for ROCm 5.1.0 ### Added - gtsv_interleaved_batch - gpsv_interleaved_batch - SpGEMM_reuse - Allow copying of mat info struct ### Improved - Optimization for SDDMM - Allow unsorted matrices in csrgemm multipass algorithm ### Known Issues - none ## rocSPARSE 2.0.0 for ROCm 5.0.0 ### Added - csrmv, coomv, ellmv, hybmv for (conjugate) transposed matrices - csrmv for symmetric matrices - Packages for test and benchmark executables on all supported OSes using CPack. ### Changed - spmm\_ex is now deprecated and will be removed in the next major release ### Improved - Optimization for gtsv ## rocSPARSE 1.22.2 for ROCm 4.5.0 ### Added - Triangular solve for multiple right-hand sides using BSR format - SpMV for BSRX format - SpMM in CSR format enhanced to work with transposed A - Matrix coloring for CSR matrices - Added batched tridiagonal solve (gtsv\_strided\_batch) - SpMM for BLOCKED ELL format - Generic routines for SpSV and SpSM - Enabling beta support for Windows 10 - Additional atomic based algorithms for SpMM in COO format - Extended version of SpMM - Additional algorithm for SpMM in CSR format - Added (conjugate) transpose support for csrmv and SpMV (CSR) routines ### Changed - Packaging split into a runtime package called rocsparse and a development package called rocsparse-devel. The development package depends on runtime. The runtime package suggests the development package for all supported OSes except CentOS 7 to aid in the transition. The suggests feature in packaging is introduced as a deprecated feature and will be removed in a future rocm release. ### Improved - Fixed a bug with gemvi on Navi21 - Fixed a bug with adaptive csrmv - Optimization for pivot based gtsv ### Known Issues - none ## rocSPARSE 1.20.2 for ROCm 4.3.0 ### Added - (batched) tridiagonal solver with and without pivoting - dense matrix sparse vector multiplication (gemvi) - support for gfx90a - sampled dense-dense matrix multiplication (sddmm) ### Improved - client matrix download mechanism - boost dependency in clients removed ### Known Issues - none ## rocSPARSE 1.19.5 for ROCm 4.2.0 ### Added - SpMM (CSR, COO) - Code coverage analysis ### Improved - Install script - Level 2/3 unit tests - rocsparse-bench does not depend on boost anymore ### Known Issues - none ## rocSPARSE 1.19.4 for ROCm 4.1.0 ### Added - gebsrmm - gebsrmv - gebsrsv - coo2dense and dense2coo - generic API including axpby, gather, scatter, rot, spvv, spmv, spgemm, sparsetodense, densetosparse - support for mixed indexing types in matrix formats ## rocSPARSE 1.18.4 for ROCm 4.0.0 ### Added - Add changelog - csr2gebsr - gebsr2gebsc - gebsr2gebsr - treating filename as regular expression for yaml-based testing generation. ### Optimized - bsric0 ### Improved - gfx1030 adjustment to the latest compiler. - Replace old xnack off compiler flag with new version. - Updates to debian package name. ### Documentation - gebsr2csr ## rocSPARSE 1.17.6 for ROCm 3.9 ### Added - prune_csr2csr, prune_dense2csr_percentage and prune_csr2csr_percentage added - bsrilu0 added - csrilu0_numeric_boost functionality added ### Known Issues - none ## rocSPARSE 1.16.1 for ROCm 3.8 ### Added - bsric0 added. ### Known Issues - none ## rocSPARSE 1.14.3 for ROCm 3.7 ### Added - Fortran bindings - CentOS 6 support. - Triangular solve for BSR format (bsrsv) - Default compiler switched to hipclang ### Optimized - bsrmv ### Known Issues - none ## rocSPARSE 1.14.3 for ROCm 3.6 ### Added - Fortran bindings - CentOS 6 support. - Triangular solve for BSR format (bsrsv) - Default compiler switched to hipclang ### Optimized - bsrmv routine ### Known Issues - none ## rocSPARSE 1.12.10 for ROCm 3.5 ### Added - Switched to hipclang as default compiler - csr2dense, csc2dense, csr2csr_compress, nnz_compress, bsr2csr, csr2bsr, bsrmv, csrgeam - Triangular solve for BSR format (bsrsv) - Options for static build - Examples ### Optimized - dense2csr, dense2csc - installation process. ### Known Issues - none rocSPARSE-rocm-5.7.1/CMakeLists.txt000066400000000000000000000301571447342677400167530ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ######################################################################## # The ROCm platform requires at least Ubuntu 16.04 or Fedora 24, which have cmake 3.5 cmake_minimum_required(VERSION 3.5 FATAL_ERROR) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if(WIN32) set(CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories") else() set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") endif() # Adding CMAKE_PREFIX_PATH, needed for static builds list( APPEND CMAKE_PREFIX_PATH /opt/rocm/llvm /opt/rocm ) # CMake modules list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip /opt/rocm/lib/cmake/hip /opt/rocm/hip/cmake) # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() # Honor per-config flags in try_compile() source-file signature. cmake v3.7 and up if(POLICY CMP0066) cmake_policy(SET CMP0066 NEW) endif() # rocSPARSE project project(rocsparse LANGUAGES CXX Fortran C) # Determine if CXX Compiler is hip-clang if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") message(STATUS "Using hip-clang to build for amdgpu backend") else() message(FATAL_ERROR "'hipcc' compiler required to compile for ROCm platform.") endif() # Build flags set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) # Build options option(BUILD_SHARED_LIBS "Build rocSPARSE as a shared library" ON) option(BUILD_CLIENTS_TESTS "Build tests (requires googletest)" OFF) option(BUILD_CLIENTS_BENCHMARKS "Build benchmarks" OFF) option(BUILD_CLIENTS_SAMPLES "Build examples" OFF) option(BUILD_VERBOSE "Output additional build information" OFF) option(BUILD_CODE_COVERAGE "Build rocSPARSE with code coverage enabled" OFF) option(BUILD_ADDRESS_SANITIZER "Build rocSPARSE with address sanitizer enabled" OFF) option(BUILD_MEMSTAT "Build rocSPARSE with memory statistics enabled" OFF) # if(BUILD_CODE_COVERAGE) add_compile_options(-fprofile-arcs -ftest-coverage) add_link_options(--coverage -lgcov) endif() if(BUILD_ADDRESS_SANITIZER) # CXX set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -fuse-ld=lld") # C set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} -fuse-ld=lld") # Fortran excluded (see NOTES below) # set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fsanitize=address") # set(CMAKE_Fortran_LINK_EXECUTABLE "${CMAKE_Fortran_LINK_EXECUTABLE} -fuse-ld=bfd") # # NOTES ON FORTRAN: # Fortran samples can compile with the options above: # > -shared-libasan and -fuse-ld=lld is not always recognized from gfortran # > -fuse-ld=bfd is here to mimic the required -fuse-ld=lld, but it compiles without too. # However, error from asan is popping up from running a Fortran sample: # ==14708==AddressSanitizer CHECK failed: /src/external/llvm-project/compiler-rt/lib/asan/../sanitizer_common/sanitizer_common_interceptors.inc:10057 "((__interception::real_memcpy)) != (0)" (0x0, 0x0) # # !!!! # For this reason, and to be consistent with other libraries, Fortran is excluded from the address sanitizer build. # !!!! # endif() # Dependencies include(cmake/Dependencies.cmake) # FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" ON) if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/library/include PATTERNS "*.h" GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} ) endif() # Detect compiler support for target ID if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$") execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" OUTPUT_VARIABLE CXX_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) endif() # Detect compiler support for target ID # This section is deprecated. Please use rocm_check_target_ids for future use. if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" OUTPUT_VARIABLE CXX_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) endif() #Set the AMDGPU_TARGETS with backward compatibility if(COMMAND rocm_check_target_ids) rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102" ) else() # Use target ID syntax if supported for AMDGPU_TARGETS if(TARGET_ID_SUPPORT) set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030;gfx1100;gfx1101;gfx1102") else() set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908") endif() endif() if (AMDGPU_TARGETS) set(TMPAMDGPU_TARGETS "${AMDGPU_TARGETS}") if(TMPAMDGPU_TARGETS STREQUAL "all" ) set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target" FORCE) else() set(AMDGPU_TARGETS "${TMPAMDGPU_TARGETS}" CACHE STRING "AMD GPU targets to compile for" FORCE) endif() else() set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") endif() message(STATUS "AMDGPU_TARGETS: ${AMDGPU_TARGETS}") # Find rocprim package find_package(rocprim REQUIRED) if( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) find_package( hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm ) endif( ) # Setup version set(VERSION_STRING "2.5.4") rocm_setup_version(VERSION ${VERSION_STRING}) set(rocsparse_SOVERSION 0.1) if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) set( BUILD_CLIENTS ON ) if(NOT CLIENTS_OS) rocm_set_os_id(CLIENTS_OS) string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS) rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID) endif() find_package(OpenMP QUIET) if(OPENMP_FOUND) set(OPENMP_RPM "libgomp") set(OPENMP_DEB "libomp-dev") endif() set(GFORTRAN_RPM "libgfortran4") set(GFORTRAN_DEB "libgfortran4") if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel") if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8") set(GFORTRAN_RPM "libgfortran") endif() elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04") set(GFORTRAN_DEB "libgfortran5") elseif(CLIENTS_OS STREQUAL "sles" AND OPENMP_FOUND) set(OPENMP_RPM "libgomp1") endif() rocm_package_setup_component(clients) rocm_package_setup_client_component(clients-common) if( BUILD_CLIENTS_TESTS ) rocm_package_setup_client_component( tests DEPENDS COMPONENT clients-common DEB "${OPENMP_DEB}" RPM "${OPENMP_RPM}" ) endif() if( BUILD_CLIENTS_BENCHMARKS ) rocm_package_setup_client_component( benchmarks DEPENDS COMPONENT clients-common DEB "${OPENMP_DEB}" RPM "${OPENMP_RPM}" ) endif() if(BUILD_FORTRAN_CLIENTS) rocm_package_add_rpm_dependencies(COMPONENT tests DEPENDS "${GFORTRAN_RPM}") rocm_package_add_deb_dependencies(COMPONENT tests DEPENDS "${GFORTRAN_DEB}") rocm_package_add_rpm_dependencies(COMPONENT benchmarks DEPENDS "${GFORTRAN_RPM}") rocm_package_add_deb_dependencies(COMPONENT benchmarks DEPENDS "${GFORTRAN_DEB}") endif() endif() # rocSPARSE library add_subdirectory(library) # Trigger client builds if selected if(BUILD_CLIENTS) add_subdirectory(clients) endif() # Package specific CPACK vars rocm_package_add_dependencies(DEPENDS "hip-rocclr >= 3.5.0") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") set(CPACK_RPM_PACKAGE_LICENSE "MIT") if(WIN32) set(CPACK_SOURCE_GENERATOR "ZIP") set(CPACK_GENERATOR "ZIP") set(CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE) set(INSTALL_PREFIX "C:/hipSDK") set(CPACK_SET_DESTDIR OFF) set(CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK") set(CPACK_PACKAGING_INSTALL_PREFIX "") set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) else() if(NOT CPACK_PACKAGING_INSTALL_PREFIX) set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") endif() endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) # Package name set(package_name rocsparse) set(ROCSPARSE_CONFIG_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Path placed into ldconfig file") rocm_create_package( NAME ${package_name} DESCRIPTION "Radeon Open Compute SPARSE library" MAINTAINER "rocSPARSE Maintainer " LDCONFIG LDCONFIG_DIR ${ROCSPARSE_CONFIG_DIR} ) # # ADDITIONAL TARGETS FOR CODE COVERAGE # if(BUILD_CODE_COVERAGE) # # > make coverage_cleanup (clean coverage related files.) # > make coverage GTEST_FILTER=<> # will run: # > make coverage_analysis GTEST_FILTER=<> (analyze tests) # > make coverage_output (generate html documentation) # # # Run coverage analysis # add_custom_target(coverage_analysis COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER} COMMAND ./clients/staging/rocsparse-test --gtest_filter=\"\${GTEST_FILTER}\" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) add_dependencies(coverage_analysis rocsparse) # # Prepare coverage output # This little script is generated because the option '--gcov-tool ' of lcov cannot take arguments. # add_custom_target(coverage_output DEPENDS coverage_analysis COMMAND mkdir -p lcoverage COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh COMMAND chmod +x llvm-gcov.sh ) # # Generate coverage output. # add_custom_command(TARGET coverage_output COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage ) add_custom_target(coverage DEPENDS coverage_output) # # Coverage cleanup # add_custom_target(coverage_cleanup COMMAND find ${CMAKE_BINARY_DIR} -name *.gcda -delete WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() rocSPARSE-rocm-5.7.1/LICENSE.md000066400000000000000000000021121447342677400156050ustar00rootroot00000000000000Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rocSPARSE-rocm-5.7.1/README.md000066400000000000000000000060131447342677400154640ustar00rootroot00000000000000# rocSPARSE rocSPARSE exposes a common interface that provides Basic Linear Algebra Subroutines for sparse computation implemented on top of AMD's Radeon Open eCosystem Platform [ROCm][] runtime and toolchains. rocSPARSE is created using the [HIP][] programming language and optimized for AMD's latest discrete GPUs. ## Documentation The latest rocSPARSE documentation and API description can be found [here][] or downloaded as [pdf][]. ## Requirements * Git * CMake (3.5 or later) * AMD [ROCm] 3.5 platform or later Optional: * [GTest][] * Required for tests. * Use GTEST_ROOT to specify GTest location. * If [GTest][] is not found, it will be downloaded and built automatically. ## Quickstart rocSPARSE build and install #### Install script You can build rocSPARSE using the *install.sh* script ``` # Clone rocSPARSE using git git clone https://github.com/ROCmSoftwarePlatform/rocSPARSE.git # Go to rocSPARSE directory cd rocSPARSE # Run install.sh script # Command line options: # -h|--help - prints help message # -i|--install - install after build # -d|--dependencies - install build dependencies # -c|--clients - build library clients too (combines with -i & -d) # -g|--debug - build with debug flag ./install.sh -dci ``` #### CMake All compiler specifications are determined automatically. The compilation process can be performed by ``` # Clone rocSPARSE using git git clone https://github.com/ROCmSoftwarePlatform/rocSPARSE.git # Go to rocSPARSE directory, create and go to the build directory cd rocSPARSE; mkdir -p build/release; cd build/release # Configure rocSPARSE # Build options: # BUILD_CLIENTS_TESTS - build tests (OFF) # BUILD_CLIENTS_BENCHMARKS - build benchmarks (OFF) # BUILD_CLIENTS_SAMPLES - build examples (ON) # BUILD_VERBOSE - verbose output (OFF) # BUILD_SHARED_LIBS - build rocSPARSE as a shared library (ON) CXX=/opt/rocm/bin/hipcc cmake -DBUILD_CLIENTS_TESTS=ON ../.. # Build make # Install [sudo] make install ``` ## Unit tests To run unit tests, rocSPARSE has to be built with option -DBUILD_CLIENTS_TESTS=ON. ``` # Go to rocSPARSE build directory cd rocSPARSE; cd build/release # Run all tests ./clients/staging/rocsparse-test ``` ## Benchmarks To run benchmarks, rocSPARSE has to be built with option -DBUILD_CLIENTS_BENCHMARKS=ON. ``` # Go to rocSPARSE build directory cd rocSPARSE/build/release # Run benchmark, e.g. ./clients/staging/rocsparse-bench -f hybmv --laplacian-dim 2000 -i 200 ``` ## Support Please use [the issue tracker][] for bugs and feature requests. ## License The [license file][] can be found in the main repository. [ROCm]: https://github.com/RadeonOpenCompute/ROCm [HIP]: https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/ [GTest]: https://github.com/google/googletest [the issue tracker]: https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues [license file]: https://github.com/ROCmSoftwarePlatform/rocSPARSE [here]: https://rocsparse.readthedocs.io [pdf]: https://rocsparse.readthedocs.io/_/downloads/en/master/pdf/ rocSPARSE-rocm-5.7.1/clients/000077500000000000000000000000001447342677400156465ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/clients/CMakeLists.txt000066400000000000000000000126561447342677400204200ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ######################################################################## # The ROCm platform requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required(VERSION 3.5 FATAL_ERROR) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if(WIN32) set(CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories") else() set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") endif() # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if(NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel.") endif() # This project may compile dependencies for clients project(rocsparse-clients LANGUAGES CXX) # Determine if CXX Compiler is hip-clang if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") message(STATUS "Using hip-clang to build for amdgpu backend") else() message(FATAL_ERROR "'hipcc' compiler required to compile for ROCm platform.") endif() list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) # This option only works for make/nmake and the ninja generators, but no reason it shouldn't be on all the time # This tells cmake to create a compile_commands.json file that can be used with clang tooling or vim set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(NOT TARGET rocsparse) find_package(rocsparse REQUIRED CONFIG PATHS /opt/rocm/rocsparse) option(BUILD_CLIENTS_TESTS "Build tests (requires googletest)" OFF) option(BUILD_CLIENTS_BENCHMARKS "Build benchmarks" OFF) option(BUILD_CLIENTS_SAMPLES "Build examples" ON) endif() # HIP headers required of all clients; clients use hip to allocate device memory list(APPEND CMAKE_PREFIX_PATH /opt/rocm) find_package(HIP REQUIRED CONFIG PATHS ${CMAKE_PREFIX_PATH}) # Build flags set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) # If OpenMP is available, we can use it to speed up some tests find_package(OpenMP QUIET) if(OPENMP_FOUND) if(NOT TARGET OpenMP::OpenMP_CXX) # OpenMP cmake fix for cmake <= 3.9 add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE) set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS}) set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads) endif() endif() if(BUILD_CLIENTS_SAMPLES) add_subdirectory(samples) endif() if(BUILD_CLIENTS_BENCHMARKS) add_subdirectory(benchmarks) endif() if(BUILD_CLIENTS_TESTS) enable_testing() add_subdirectory(tests) endif() set(ROCSPARSE_COMMON "${PROJECT_BINARY_DIR}/staging/rocsparse_common.yaml") add_custom_command(OUTPUT "${ROCSPARSE_COMMON}" COMMAND ${CMAKE_COMMAND} -E copy include/rocsparse_common.yaml "${ROCSPARSE_COMMON}" DEPENDS include/rocsparse_common.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") set(ROCSPARSE_TEMPLATE "${PROJECT_BINARY_DIR}/staging/rocsparse_template.yaml") add_custom_command(OUTPUT "${ROCSPARSE_TEMPLATE}" COMMAND ${CMAKE_COMMAND} -E copy include/rocsparse_template.yaml "${ROCSPARSE_TEMPLATE}" DEPENDS include/rocsparse_template.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") set(ROCSPARSE_GENTEST "${PROJECT_BINARY_DIR}/staging/rocsparse_gentest.py") add_custom_command(OUTPUT "${ROCSPARSE_GENTEST}" COMMAND ${CMAKE_COMMAND} -E copy common/rocsparse_gentest.py "${ROCSPARSE_GENTEST}" DEPENDS common/rocsparse_gentest.py WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") add_custom_target(rocsparse-common DEPENDS "${ROCSPARSE_COMMON}" "${ROCSPARSE_TEMPLATE}" "${ROCSPARSE_GENTEST}") rocm_install( FILES "${ROCSPARSE_COMMON}" "${ROCSPARSE_TEMPLATE}" COMPONENT clients-common DESTINATION ${CMAKE_INSTALL_BINDIR} ) rocm_install( PROGRAMS "${ROCSPARSE_GENTEST}" COMPONENT clients-common DESTINATION ${CMAKE_INSTALL_BINDIR} ) rocSPARSE-rocm-5.7.1/clients/benchmarks/000077500000000000000000000000001447342677400177635ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/clients/benchmarks/CMakeLists.txt000066400000000000000000000165501447342677400225320ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ######################################################################## find_package(rocsparseio QUIET) # MEMSTAT ? if(BUILD_MEMSTAT) add_compile_options(-DROCSPARSE_WITH_MEMSTAT) endif() set(ROCSPARSE_BENCHMARK_SOURCES client.cpp rocsparse_bench_app.cpp rocsparse_arguments_config.cpp rocsparse_bench.cpp rocsparse_bench_cmdlines.cpp rocsparse_routine.cpp ) set(ROCSPARSE_CLIENTS_COMMON ../common/utility.cpp ../common/rocsparse_check.cpp ../common/rocsparse_enum.cpp ../common/rocsparse_init.cpp ../common/rocsparse_host.cpp ../common/rocsparse_matrix_factory.cpp ../common/rocsparse_matrix_factory_laplace2d.cpp ../common/rocsparse_matrix_factory_laplace3d.cpp ../common/rocsparse_matrix_factory_zero.cpp ../common/rocsparse_matrix_factory_random.cpp ../common/rocsparse_matrix_factory_tridiagonal.cpp ../common/rocsparse_matrix_factory_pentadiagonal.cpp ../common/rocsparse_matrix_factory_file.cpp ../common/rocsparse_exporter_rocsparseio.cpp ../common/rocsparse_exporter_rocalution.cpp ../common/rocsparse_exporter_matrixmarket.cpp ../common/rocsparse_exporter_ascii.cpp ../common/rocsparse_importer.cpp ../common/rocsparse_importer_rocalution.cpp ../common/rocsparse_importer_rocsparseio.cpp ../common/rocsparse_importer_matrixmarket.cpp ../common/rocsparse_clients_envariables.cpp ) set(ROCSPARSE_CLIENTS_TESTINGS ../testings/testing_axpyi.cpp ../testings/testing_doti.cpp ../testings/testing_dotci.cpp ../testings/testing_gthr.cpp ../testings/testing_gthrz.cpp ../testings/testing_roti.cpp ../testings/testing_sctr.cpp ../testings/testing_bsrmv.cpp ../testings/testing_bsrxmv.cpp ../testings/testing_bsrsv.cpp ../testings/testing_coomv.cpp ../testings/testing_csrmv.cpp ../testings/testing_csrmv_managed.cpp ../testings/testing_csrsv.cpp ../testings/testing_csritsv.cpp ../testings/testing_ellmv.cpp ../testings/testing_hybmv.cpp ../testings/testing_gebsrmv.cpp ../testings/testing_bsrmm.cpp ../testings/testing_gebsrmm.cpp ../testings/testing_csrmm.cpp ../testings/testing_spmm_csr.cpp ../testings/testing_spmm_csc.cpp ../testings/testing_spmm_coo.cpp ../testings/testing_spmm_bell.cpp ../testings/testing_spmm_batched_csr.cpp ../testings/testing_spmm_batched_csc.cpp ../testings/testing_spmm_batched_coo.cpp ../testings/testing_spmm_batched_bell.cpp ../testings/testing_csrsm.cpp ../testings/testing_bsrsm.cpp ../testings/testing_gemmi.cpp ../testings/testing_bsrgemm.cpp ../testings/testing_bsrgeam.cpp ../testings/testing_csrgeam.cpp ../testings/testing_csrgemm.cpp ../testings/testing_csrgemm_reuse.cpp ../testings/testing_bsric0.cpp ../testings/testing_bsrilu0.cpp ../testings/testing_csric0.cpp ../testings/testing_csrilu0.cpp ../testings/testing_csritilu0.cpp ../testings/testing_gpsv_interleaved_batch.cpp ../testings/testing_gtsv.cpp ../testings/testing_gtsv_no_pivot.cpp ../testings/testing_gtsv_no_pivot_strided_batch.cpp ../testings/testing_gtsv_interleaved_batch.cpp ../testings/testing_csr2coo.cpp ../testings/testing_csr2csc.cpp ../testings/testing_gebsr2gebsc.cpp ../testings/testing_gebsr2gebsr.cpp ../testings/testing_csr2ell.cpp ../testings/testing_csr2hyb.cpp ../testings/testing_csr2bsr.cpp ../testings/testing_csr2gebsr.cpp ../testings/testing_coo2csr.cpp ../testings/testing_ell2csr.cpp ../testings/testing_hyb2csr.cpp ../testings/testing_bsr2csr.cpp ../testings/testing_gebsr2csr.cpp ../testings/testing_csr2csr_compress.cpp ../testings/testing_prune_csr2csr.cpp ../testings/testing_prune_csr2csr_by_percentage.cpp ../testings/testing_identity.cpp ../testings/testing_csrsort.cpp ../testings/testing_cscsort.cpp ../testings/testing_coosort.cpp ../testings/testing_csricsv.cpp ../testings/testing_csrilusv.cpp ../testings/testing_nnz.cpp ../testings/testing_dense2csr.cpp ../testings/testing_dense2coo.cpp ../testings/testing_prune_dense2csr.cpp ../testings/testing_prune_dense2csr_by_percentage.cpp ../testings/testing_dense2csc.cpp ../testings/testing_csr2dense.cpp ../testings/testing_csc2dense.cpp ../testings/testing_coo2dense.cpp ../testings/testing_spmv_bsr.cpp ../testings/testing_spmv_coo.cpp ../testings/testing_spmv_coo_aos.cpp ../testings/testing_spmv_csr.cpp ../testings/testing_spmv_csc.cpp ../testings/testing_spmv_ell.cpp ../testings/testing_spsv_csr.cpp ../testings/testing_spsv_coo.cpp ../testings/testing_spitsv_csr.cpp ../testings/testing_spsm_csr.cpp ../testings/testing_spsm_coo.cpp ../testings/testing_sparse_to_dense_coo.cpp ../testings/testing_sparse_to_dense_csr.cpp ../testings/testing_sparse_to_dense_csc.cpp ../testings/testing_dense_to_sparse_coo.cpp ../testings/testing_dense_to_sparse_csr.cpp ../testings/testing_dense_to_sparse_csc.cpp ../testings/testing_spgemm_bsr.cpp ../testings/testing_spgemm_csr.cpp ../testings/testing_gemvi.cpp ../testings/testing_sddmm.cpp ../testings/testing_csrcolor.cpp ../testings/testing_check_matrix_csr.cpp ../testings/testing_check_matrix_csc.cpp ../testings/testing_check_matrix_coo.cpp ../testings/testing_check_matrix_gebsr.cpp ../testings/testing_check_matrix_gebsc.cpp ../testings/testing_check_matrix_ell.cpp ../testings/testing_check_matrix_hyb.cpp ../testings/testing_bsrpad_value.cpp ) add_executable(rocsparse-bench ${ROCSPARSE_BENCHMARK_SOURCES} ${ROCSPARSE_CLIENTS_COMMON} ${ROCSPARSE_CLIENTS_TESTINGS}) # Target compile options target_compile_options(rocsparse-bench PRIVATE -Wno-unused-command-line-argument -Wall) if (rocsparseio_FOUND) message("Compile with ROCSPARSEIO") target_compile_options(rocsparse-bench PRIVATE -DROCSPARSEIO) endif() # Internal common header target_include_directories(rocsparse-bench PRIVATE $) # Target link libraries target_link_libraries(rocsparse-bench PRIVATE roc::rocsparse hip::host hip::device) if (rocsparseio_FOUND) target_link_libraries(rocsparse-bench PRIVATE roc::rocsparseio) endif() # Add OpenMP if available if(OPENMP_FOUND) if (NOT WIN32) target_link_libraries(rocsparse-bench PRIVATE OpenMP::OpenMP_CXX -Wl,-rpath=${HIP_CLANG_ROOT}/lib) else() target_link_libraries(rocsparse-bench PRIVATE libomp) endif() endif() # Set benchmark output directory set_target_properties(rocsparse-bench PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging") rocm_install(TARGETS rocsparse-bench COMPONENT benchmarks) rocSPARSE-rocm-5.7.1/clients/benchmarks/client.cpp000066400000000000000000000101071447342677400217440ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse.hpp" #include "rocsparse_bench.hpp" #include "rocsparse_routine.hpp" #include "utility.hpp" #include #include #include "rocsparse_bench_app.hpp" // // REQUIRED ROUTINES: // - rocsparse_record_timing // - rocsparse_record_output // - rocsparse_record_output_legend // - display_timing_info_is_stdout_disabled // rocsparse_status rocsparse_record_output_legend(const std::string& s) { auto* s_bench_app = rocsparse_bench_app::instance(); if(s_bench_app) { auto status = s_bench_app->record_output_legend(s); return status; } else { return rocsparse_status_success; } } rocsparse_status rocsparse_record_output(const std::string& s) { auto* s_bench_app = rocsparse_bench_app::instance(); if(s_bench_app) { auto status = s_bench_app->record_output(s); return status; } else { return rocsparse_status_success; } } rocsparse_status rocsparse_record_timing(double msec, double gflops, double gbs) { auto* s_bench_app = rocsparse_bench_app::instance(); if(s_bench_app) { return s_bench_app->record_timing(msec, gflops, gbs); } else { return rocsparse_status_success; } } bool display_timing_info_is_stdout_disabled() { auto* s_bench_app = rocsparse_bench_app::instance(); if(s_bench_app) { return s_bench_app->is_stdout_disabled(); } else { return false; } } int main(int argc, char* argv[]) { if(rocsparse_bench_app::applies(argc, argv)) { try { auto* s_bench_app = rocsparse_bench_app::instance(argc, argv); // // RUN CASES. // rocsparse_status status = s_bench_app->run_cases(); if(status != rocsparse_status_success) { return status; } // // EXPORT FILE. // status = s_bench_app->export_file(); if(status != rocsparse_status_success) { return status; } return status; } catch(const rocsparse_status& status) { return status; } } else { // // old style. // try { rocsparse_bench bench(argc, argv); // // Print info devices. // bench.info_devices(std::cout); // // Run benchmark. // rocsparse_status status = bench.run(); if(status != rocsparse_status_success) { return status; } return status; } catch(const rocsparse_status& status) { return status; } } } rocSPARSE-rocm-5.7.1/clients/benchmarks/program_options.hpp000066400000000000000000000303361447342677400237230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2021 Advanced Micro Devices, Inc. All rights Reserved. * ************************************************************************ */ // This emulates the required functionality of boost::program_options #pragma once #include #include #include #include #include #include #include #include #include #include // Regular expression for token delimiters (whitespace and commas) static const std::regex program_options_regex{"[, \\f\\n\\r\\t\\v]+", std::regex_constants::optimize}; // variables_map is a set of seen options using variables_map = std::set; // Polymorphic base class to use with dynamic_cast class value_base { protected: bool m_has_default = false; public: bool has_default() const { return m_has_default; } virtual ~value_base() = default; }; // Value parameters template class value : public value_base { T* m_var; // Pointer to variable to be modified public: // Constructor explicit value(T* var) : m_var(var) { } // Pointer to variable T* get_ptr() const { return m_var; } // Allows default_value() value* operator->() { return this; } // Set default value value& default_value(T val) { *m_var = std::move(val); m_has_default = true; return *this; } }; // bool_switch is a value, which is handled specially using bool_switch = value; class options_description { // desc_option describes a particular option class desc_option { std::string m_opts; value_base* m_val; std::string m_desc; public: desc_option& operator=(const desc_option&) = delete; // Constructor with options, value and description template desc_option(std::string opts, value val, std::string desc) : m_opts(std::move(opts)) , m_val(new auto(std::move(val))) , m_desc(std::move(desc)) { } // Constructor with options and description desc_option(std::string opts, std::string desc) : m_opts(std::move(opts)) , m_val(nullptr) , m_desc(std::move(desc)) { } // Copy constructor is deleted desc_option(const desc_option&) = delete; // Move constructor desc_option(desc_option&& other) : m_opts(std::move(other.m_opts)) , m_val(other.m_val) , m_desc(std::move(other.m_desc)) { other.m_val = nullptr; } // Destructor ~desc_option() { delete m_val; } // Accessors const std::string& get_opts() const { return m_opts; } const value_base* get_val() const { return m_val; } const std::string& get_desc() const { return m_desc; } // Set a value void set_val(int& argc, char**& argv) const { // We test all supported types with dynamic_cast and parse accordingly bool match = false; if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, "%" SCNd32, val) == 1; } else if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, "%" SCNu32, val) == 1; } else if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, "%" SCNd64, val) == 1; } else if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, "%" SCNu64, val) == 1; } else if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, "%f", val) == 1; } else if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, "%lf", val) == 1; } else if(dynamic_cast*>(m_val)) { auto* val = dynamic_cast*>(m_val)->get_ptr(); match = argc && sscanf(*argv, " %c", val) == 1; } else if(dynamic_cast*>(m_val)) { // We handle bool specially, setting the value to true without argument auto* val = dynamic_cast*>(m_val)->get_ptr(); *val = true; return; } else if(dynamic_cast*>(m_val)) { if(argc) { *dynamic_cast*>(m_val)->get_ptr() = *argv; match = true; } } else { throw std::logic_error("Internal error: Unsupported data type"); } if(!match) throw std::invalid_argument(argc ? *argv : "Missing required argument"); // Skip past the argument's value ++argv; --argc; } }; // Description and option list std::string m_desc; std::vector m_optlist; // desc_optionlist allows chains of options to be parenthesized class desc_optionlist { std::vector& m_list; public: explicit desc_optionlist(std::vector& list) : m_list(list) { } template desc_optionlist operator()(Ts&&... arg) { m_list.push_back(desc_option(std::forward(arg)...)); return *this; } }; public: // Constructor explicit options_description(std::string desc) : m_desc(std::move(desc)) { } // Start a desc_optionlist chain desc_optionlist add_options() & { return desc_optionlist(m_optlist); } // Parse an option at the current (argc, argv) position void parse_option(int& argc, char**& argv, variables_map& vm, bool ignoreUnknown = false) const { // Iterate across all options for(const auto& opt : m_optlist) { // Canonical name used for map std::string canonical_name; // Iterate across tokens in the opts for(std::sregex_token_iterator tok{ opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; tok != std::sregex_token_iterator(); ++tok) { // The first option in a list of options is the canonical name if(!canonical_name.length()) canonical_name = tok->str(); // If the length of the option is 1, it is single-dash; otherwise double-dash const char* prefix = tok->length() == 1 ? "-" : "--"; // If option matches if(*argv == prefix + tok->str()) { ++argv; --argc; // If option has a value, set it; otherwise indicate option in set if(opt.get_val()) opt.set_val(argc, argv); else vm.insert(canonical_name); return; // Return successfully } } } // No options were matched if(ignoreUnknown) { ++argv; --argc; } else { throw std::invalid_argument(*argv); } } // Formatted output of command-line arguments description friend std::ostream& operator<<(std::ostream& os, const options_description& d) { // Iterate across all options for(const auto& opt : d.m_optlist) { bool first = true; const char* delim = ""; std::ostringstream left; // Iterate across tokens in the opts for(std::sregex_token_iterator tok{opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; tok != std::sregex_token_iterator(); ++tok, first = false, delim = " ") { // If the length of the option is 1, it is single-dash; otherwise double-dash const char* prefix = tok->length() == 1 ? "-" : "--"; left << delim << (first ? "" : "[ ") << prefix << tok->str() << (first ? "" : " ]"); } // Print the default value of the variable type if it exists // We do not print the default value for bool const value_base* val = opt.get_val(); if(val && !dynamic_cast*>(val)) { left << " arg"; if(val->has_default()) { // We test all supported types with dynamic_cast and print accordingly left << " (="; if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else if(dynamic_cast*>(val)) left << *dynamic_cast*>(val)->get_ptr(); else throw std::logic_error("Internal error: Unsupported data type"); left << ")"; } } os << std::setw(36) << std::left << left.str() << " " << opt.get_desc() << "\n\n"; } return os << std::flush; } }; // Class representing command line parser class parse_command_line { variables_map m_vm; public: parse_command_line(int argc, char** argv, const options_description& desc, bool ignoreUnknown = false) { ++argv; // Skip argv[0] --argc; while(argc) desc.parse_option(argc, argv, m_vm, ignoreUnknown); } // Copy the variables_map friend void store(const parse_command_line& p, variables_map& vm) { vm = p.m_vm; } // Move the variables_map friend void store(parse_command_line&& p, variables_map& vm) { vm = std::move(p.m_vm); } }; // We can define the notify() function as a no-op for our purposes inline void notify(const variables_map&) {} rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_arguments_config.cpp000066400000000000000000001022571447342677400261110ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_arguments_config.hpp" #include "rocsparse_enum.hpp" rocsparse_arguments_config::rocsparse_arguments_config() { // // Arguments must be a C-compatible struct so cppcheck complains about non-initialized member variables. // Then we need to initialize. { this->M = static_cast(0); this->N = static_cast(0); this->K = static_cast(0); this->nnz = static_cast(0); this->block_dim = static_cast(0); this->row_block_dimA = static_cast(0); this->col_block_dimA = static_cast(0); this->row_block_dimB = static_cast(0); this->col_block_dimB = static_cast(0); this->dimx = static_cast(0); this->dimy = static_cast(0); this->dimz = static_cast(0); this->ll = static_cast(0); this->l = static_cast(0); this->u = static_cast(0); this->uu = static_cast(0); this->index_type_I = static_cast(0); this->index_type_J = static_cast(0); this->compute_type = static_cast(0); this->alpha = static_cast(0); this->alphai = static_cast(0); this->beta = static_cast(0); this->betai = static_cast(0); this->threshold = static_cast(0); this->percentage = static_cast(0); this->transA = static_cast(0); this->transB = static_cast(0); this->baseA = static_cast(0); this->baseB = static_cast(0); this->baseC = static_cast(0); this->baseD = static_cast(0); this->action = static_cast(0); this->part = static_cast(0); this->matrix_type = static_cast(0); this->diag = static_cast(0); this->uplo = static_cast(0); this->storage = static_cast(0); this->apol = static_cast(0); this->spol = static_cast(0); this->direction = static_cast(0); this->order = static_cast(0); this->format = static_cast(0); this->itilu0_alg = rocsparse_itilu0_alg_default; this->sddmm_alg = rocsparse_sddmm_alg_default; this->spmv_alg = rocsparse_spmv_alg_default; this->spsv_alg = rocsparse_spsv_alg_default; this->spitsv_alg = rocsparse_spitsv_alg_default; this->spsm_alg = rocsparse_spsm_alg_default; this->spmm_alg = rocsparse_spmm_alg_default; this->spgemm_alg = rocsparse_spgemm_alg_default; this->sparse_to_dense_alg = rocsparse_sparse_to_dense_alg_default; this->dense_to_sparse_alg = rocsparse_dense_to_sparse_alg_default; this->gtsv_interleaved_alg = static_cast(0); this->gpsv_interleaved_alg = static_cast(0); this->matrix = static_cast(0); this->matrix_init_kind = static_cast(0); this->unit_check = static_cast(0); this->timing = static_cast(1); this->iters = static_cast(0); this->denseld = static_cast(0); this->batch_count = static_cast(0); this->batch_count_A = static_cast(0); this->batch_count_B = static_cast(0); this->batch_count_C = static_cast(0); this->batch_stride = static_cast(0); this->algo = static_cast(0); this->numericboost = static_cast(0); this->boosttol = static_cast(0); this->boostval = static_cast(0); this->boostvali = static_cast(0); this->tolm = static_cast(0); this->graph_test = static_cast(0); this->filename[0] = '\0'; this->function[0] = '\0'; this->name[0] = '\0'; this->category[0] = '\0'; this->hardware[0] = '\0'; } this->precision = 's'; this->indextype = 's'; } void rocsparse_arguments_config::set_description(options_description& desc) { desc.add_options()("help,h", "produces this help message") // clang-format off ("sizem,m", value(&this->M)->default_value(128), "Specific matrix size testing: sizem is only applicable to SPARSE-2 " "& SPARSE-3: the number of rows.") ("sizen,n", value(&this->N)->default_value(128), "Specific matrix/vector size testing: SPARSE-1: the length of the " "dense vector. SPARSE-2 & SPARSE-3: the number of columns") ("sizek,k", value(&this->K)->default_value(128), "Specific matrix/vector size testing: SPARSE-3: the number of columns") ("sizennz,z", value(&this->nnz)->default_value(32), "Specific vector size testing, LEVEL-1: the number of non-zero elements " "of the sparse vector.") ("blockdim", value(&this->block_dim)->default_value(2), "BSR block dimension (default: 2)") ("row-blockdimA", value(&this->row_block_dimA)->default_value(2), "General BSR row block dimension (default: 2)") ("col-blockdimA", value(&this->col_block_dimA)->default_value(2), "General BSR col block dimension (default: 2)") ("row-blockdimB", value(&this->row_block_dimB)->default_value(2), "General BSR row block dimension (default: 2)") ("col-blockdimB", value(&this->col_block_dimB)->default_value(2), "General BSR col block dimension (default: 2)") ("mtx", value(&this->b_matrixmarket)->default_value(""), "read from matrix " "market (.mtx) format. This will override parameters -m, -n, and -z.") ("rocalution", value(&this->b_rocalution)->default_value(""), "read from rocalution matrix binary file. This will override parameter --mtx") ("rocsparseio", value(&this->b_rocsparseio)->default_value(""), "read from rocsparseio matrix binary file. This will override parameter --rocalution") ("file", value(&this->b_file)->default_value(""), "read from file with file extension detection. This will override parameters --rocsparseio,rocalution,mtx") ("dimx", value(&this->dimx)->default_value(0), "assemble " "laplacian matrix with dimensions . dimz is optional. This " "will override parameters -m, -n, -z and --mtx.") ("dimy", value(&this->dimy)->default_value(0), "assemble " "laplacian matrix with dimensions . dimz is optional. This " "will override parameters -m, -n, -z and --mtx.") ("dimz", value(&this->dimz)->default_value(0), "assemble " "laplacian matrix with dimensions . dimz is optional. This " "will override parameters -m, -n, -z and --mtx.") ("diag_ll", value(&this->ll)->default_value(0), "assemble " "pentadiagonal matrix with stencil .") ("diag_l", value(&this->l)->default_value(0), "assemble " "tridiagonal matrix with stencil or pentadiagonal matrix with stencil .") ("diag_u", value(&this->u)->default_value(0), "assemble " "tridiagonal matrix with stencil or pentadiagonal matrix with stencil .") ("diag_uu", value(&this->uu)->default_value(0), "assemble " "pentadiagonal matrix with stencil .") ("alpha", value(&this->alpha)->default_value(1.0), "specifies the scalar alpha") ("beta", value(&this->beta)->default_value(0.0), "specifies the scalar beta") ("threshold", value(&this->threshold)->default_value(1.0), "specifies the scalar threshold") ("percentage", value(&this->percentage)->default_value(0.0), "specifies the scalar percentage") ("transposeA", value(&this->b_transA)->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose") ("transposeB", value(&this->b_transB)->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose, (default = N)") ("indexbaseA", value(&this->b_baseA)->default_value(0), "0 = zero-based indexing, 1 = one-based indexing, (default: 0)") ("indexbaseB", value(&this->b_baseB)->default_value(0), "0 = zero-based indexing, 1 = one-based indexing, (default: 0)") ("indexbaseC", value(&this->b_baseC)->default_value(0), "0 = zero-based indexing, 1 = one-based indexing, (default: 0)") ("indexbaseD", value(&this->b_baseD)->default_value(0), "0 = zero-based indexing, 1 = one-based indexing, (default: 0)") ("action", value(&this->b_action)->default_value(0), "0 = rocsparse_action_numeric, 1 = rocsparse_action_symbolic, (default: 0)") ("hybpart", value(&this->b_part)->default_value(0), "0 = rocsparse_hyb_partition_auto, 1 = rocsparse_hyb_partition_user,\n" "2 = rocsparse_hyb_partition_max, (default: 0)") ("matrix_type", value(&this->b_matrix_type)->default_value(0), "0 = rocsparse_matrix_type_general, 1 = rocsparse_matrix_type_symmetric,\n" "2 = rocsparse_matrix_type_hermitian, 3 = rocsparse_matrix_type_triangular, (default: 0)") ("diag", value(&this->b_diag)->default_value('N'), "N = non-unit diagonal, U = unit diagonal, (default = N)") ("uplo", value(&this->b_uplo)->default_value('L'), "L = lower fill, U = upper fill, (default = L)") ("storage", value(&this->b_storage)->default_value(0), "0 = rocsparse_storage_mode_sorted, 1 = rocsparse_storage_mode_unsorted, (default = 0)") ("apolicy", value(&this->b_apol)->default_value('R'), "R = reuse meta data, F = force re-build, (default = R)") ("function,f", value(&this->function_name)->default_value("axpyi"), "SPARSE function to test. Options:\n" " Level1: axpyi, doti, dotci, gthr, gthrz, roti, sctr\n" " Level2: bsrmv, bsrxmv, bsrsv, coomv, coomv_aos, csrmv, csrmv_managed, csrsv, csritsv, coosv, ellmv, hybmv, gebsrmv, gemvi\n" " Level3: bsrmm, bsrsm, gebsrmm, csrmm, csrmm_batched, coomm, coomm_batched, cscmm, cscmm_batched, csrsm, coosm, gemmi, sddmm\n" " Extra: bsrgeam, bsrgemm, csrgeam, csrgemm, csrgemm_reuse\n" " Preconditioner: bsric0, bsrilu0, csric0, csrilu0, csritilu0, gtsv, gtsv_no_pivot, gtsv_no_pivot_strided_batch, gtsv_interleaved_batch, gpsv_interleaved_batch\n" " Conversion: csr2coo, csr2csc, gebsr2gebsc, csr2ell, csr2hyb, csr2bsr, csr2gebsr\n" " coo2csr, ell2csr, hyb2csr, dense2csr, dense2coo, prune_dense2csr, prune_dense2csr_by_percentage, dense2csc\n" " csr2dense, csc2dense, coo2dense, bsr2csr, gebsr2csr, gebsr2gebsr, csr2csr_compress, prune_csr2csr, prune_csr2csr_by_percentage\n" " sparse_to_dense_coo, sparse_to_dense_csr, sparse_to_dense_csc, dense_to_sparse_coo, dense_to_sparse_csr, dense_to_sparse_csc\n" " Sorting: cscsort, csrsort, coosort\n" " Misc: identity, nnz\n" " Util: check_matrix_csr, check_matrix_csc, check_matrix_coo, check_matrix_gebsr, check_matrix_gebsc, check_matrix_ell, check_matrix_hyb") ("indextype", value(&this->indextype)->default_value('s'), "Specify index types to be int32_t (s), int64_t (d) or mixed (m). Options: s,d,m") ("precision,r", value(&this->precision)->default_value('s'), "Options: s,d,c,z") ("verify,v", value(&this->unit_check)->default_value(0), "Validate GPU results with CPU? 0 = No, 1 = Yes (default: No)") ("iters,i", value(&this->iters)->default_value(10), "Iterations to run inside timing loop") ("device,d", value(&this->device_id)->default_value(0), "Set default device to be used for subsequent program runs") ("direction", value(&this->b_dir)->default_value(rocsparse_direction_row), "Indicates whether BSR blocks should be laid out in row-major storage or by column-major storage: row-major storage = 0, column-major storage = 1 (default: 0)") ("order", value(&this->b_order)->default_value(rocsparse_order_column), "Indicates whether a dense matrix is laid out in column-major storage: 1, or row-major storage 0 (default: 1)") ("format", value(&this->b_format)->default_value(rocsparse_format_coo), "Indicates whether a sparse matrix is laid out in coo format: 0, coo_aos format: 1, csr format: 2, csc format: 3 or ell format: 4 (default:0)") ("denseld", value(&this->denseld)->default_value(128), "Indicates the leading dimension of a dense matrix >= M, assuming a column-oriented storage.") ("batch_count", value(&this->batch_count)->default_value(128), "Indicates the batch count for batched routines.") ("batch_count_A", value(&this->batch_count_A)->default_value(128), "Indicates the batch count for the sparse A matrix in spmm batched routines.") ("batch_count_B", value(&this->batch_count_B)->default_value(128), "Indicates the batch count for the dense B matrix in spmm batched routines.") ("batch_count_C", value(&this->batch_count_C)->default_value(128), "Indicates the batch count for the dense C matrix in spmm batched routines.") ("batch_stride", value(&this->batch_stride)->default_value(128), "Indicates the batch stride for batched routines.") #ifdef ROCSPARSE_WITH_MEMSTAT ("memstat-report", value(&this->b_memory_report_filename)->default_value("rocsparse_bench_memstat.json"), "Output filename for memory report.") #endif ("spmv_alg", value(&this->b_spmv_alg)->default_value(rocsparse_spmv_alg_default), "Indicates what algorithm to use when running SpMV. Possibly choices are default: 0, COO: 1, CSR adaptive: 2, CSR stream: 3, ELL: 4, COO atomic: 5 (default:0)") ("itilu0_alg", value(&this->b_itilu0_alg)->default_value(rocsparse_itilu0_alg_default), "Indicates what algorithm to use when running Iterative ILU0. see documentation.") ("spmm_alg", value(&this->b_spmm_alg)->default_value(rocsparse_spmm_alg_default), "Indicates what algorithm to use when running SpMM. Possibly choices are default: 0, CSR: 1, COO segmented: 2, COO atomic: 3, CSR row split: 4, CSR merge: 5, COO segmented atomic: 6, BELL: 7 (default:0)") ("gtsv_interleaved_alg", value(&this->b_gtsv_interleaved_alg)->default_value(rocsparse_gtsv_interleaved_alg_default), "Indicates what algorithm to use when running rocsparse_gtsv_interleaved_batch. Possibly choices are thomas: 1, lu: 2, qr: 3 (default:3)"); } int rocsparse_arguments_config::parse(int&argc,char**&argv, options_description&desc) { variables_map vm; store(parse_command_line(argc, argv, desc, sizeof(rocsparse_arguments_config)), vm); notify(vm); if(vm.count("help")) { std::cout << desc << std::endl; return -2; } if(this->b_dir != rocsparse_direction_row && this->b_dir != rocsparse_direction_column) { std::cerr << "Invalid value for --direction" << std::endl; return -1; } if(this->b_order != rocsparse_order_row && this->b_order != rocsparse_order_column) { std::cerr << "Invalid value for --order" << std::endl; return -1; } if(this->b_format != rocsparse_format_csr && this->b_format != rocsparse_format_coo && this->b_format != rocsparse_format_coo_aos && this->b_format != rocsparse_format_ell && this->b_format != rocsparse_format_csc) { std::cerr << "Invalid value for --format" << std::endl; return -1; } if (rocsparse_itilu0_alg_t::is_invalid(this->b_itilu0_alg)) { std::cerr << "Invalid value '" << this->b_itilu0_alg << "' for --itilu0_alg, valid values are : ("; rocsparse_itilu0_alg_t::info(std::cerr); std::cerr << ")" << std::endl; return -1; } if(this->b_spmv_alg != rocsparse_spmv_alg_default && this->b_spmv_alg != rocsparse_spmv_alg_coo && this->b_spmv_alg != rocsparse_spmv_alg_csr_adaptive && this->b_spmv_alg != rocsparse_spmv_alg_csr_stream && this->b_spmv_alg != rocsparse_spmv_alg_ell && this->b_spmv_alg != rocsparse_spmv_alg_coo_atomic) { std::cerr << "Invalid value for --spmv_alg" << std::endl; return -1; } if(this->b_spmm_alg != rocsparse_spmm_alg_default && this->b_spmm_alg != rocsparse_spmm_alg_csr && this->b_spmm_alg != rocsparse_spmm_alg_coo_segmented && this->b_spmm_alg != rocsparse_spmm_alg_coo_atomic && this->b_spmm_alg != rocsparse_spmm_alg_csr_row_split && this->b_spmm_alg != rocsparse_spmm_alg_csr_merge && this->b_spmm_alg != rocsparse_spmm_alg_coo_segmented_atomic && this->b_spmm_alg != rocsparse_spmm_alg_bell) { std::cerr << "Invalid value for --spmm_alg" << std::endl; return -1; } if(this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_default && this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_thomas && this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_lu && this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_qr) { std::cerr << "Invalid value for --gtsv_interleaved_alg" << std::endl; return -1; } if(this->b_transA == 'N') { this->transA = rocsparse_operation_none; } else if(this->b_transA == 'T') { this->transA = rocsparse_operation_transpose; } else if(this->b_transA == 'C') { this->transA = rocsparse_operation_conjugate_transpose; } if(this->b_transB == 'N') { this->transB = rocsparse_operation_none; } else if(this->b_transB == 'T') { this->transB = rocsparse_operation_transpose; } else if(this->b_transB == 'C') { this->transB = rocsparse_operation_conjugate_transpose; } sprintf(this->function,"%s",this->function_name.c_str()); this->baseA = (this->b_baseA == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->baseB = (this->b_baseB == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->baseC = (this->b_baseC == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->baseD = (this->b_baseD == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->action = (this->b_action == 0) ? rocsparse_action_numeric : rocsparse_action_symbolic; this->part = (this->b_part == 0) ? rocsparse_hyb_partition_auto : (this->b_part == 1) ? rocsparse_hyb_partition_user : rocsparse_hyb_partition_max; this->matrix_type = (this->b_matrix_type == 0) ? rocsparse_matrix_type_general : (this->b_matrix_type == 1) ? rocsparse_matrix_type_symmetric : (this->b_matrix_type == 2) ? rocsparse_matrix_type_hermitian : rocsparse_matrix_type_triangular; this->diag = (this->b_diag == 'N') ? rocsparse_diag_type_non_unit : rocsparse_diag_type_unit; this->uplo = (this->b_uplo == 'L') ? rocsparse_fill_mode_lower : rocsparse_fill_mode_upper; this->storage = (this->b_storage == 0) ? rocsparse_storage_mode_sorted : rocsparse_storage_mode_unsorted; this->apol = (this->b_apol == 'R') ? rocsparse_analysis_policy_reuse : rocsparse_analysis_policy_force; this->spol = rocsparse_solve_policy_auto; this->direction = (this->b_dir == rocsparse_direction_row) ? rocsparse_direction_row : rocsparse_direction_column; this->order = (this->b_order == rocsparse_order_row) ? rocsparse_order_row : rocsparse_order_column; this->format = (rocsparse_format)this->b_format; this->spmv_alg = (rocsparse_spmv_alg)this->b_spmv_alg; this->itilu0_alg = (rocsparse_itilu0_alg)this->b_itilu0_alg; this->spmm_alg = (rocsparse_spmm_alg)this->b_spmm_alg; this->gtsv_interleaved_alg = (rocsparse_gtsv_interleaved_alg)this->b_gtsv_interleaved_alg; #ifdef ROCSPARSE_WITH_MEMSTAT rocsparse_status status = rocsparse_memstat_report(this->b_memory_report_filename.c_str()); if (status != rocsparse_status_success) { std::cerr << "rocsparse_memstat_report failed " << std::endl; return -1; } #endif // rocALUTION parameter overrides filename parameter if(this->b_file != "") { strcpy(this->filename, this->b_file.c_str()); } else if(this->b_rocsparseio != "") { strcpy(this->filename, this->b_rocsparseio.c_str()); this->matrix = rocsparse_matrix_file_rocsparseio; } else if(this->b_rocalution != "") { strcpy(this->filename, this->b_rocalution.c_str()); this->matrix = rocsparse_matrix_file_rocalution; } else if(this->dimx != 0 && this->dimy != 0 && this->dimz != 0) { this->matrix = rocsparse_matrix_laplace_3d; } else if(this->dimx != 0 && this->dimy != 0) { this->matrix = rocsparse_matrix_laplace_2d; } else if(this->b_matrixmarket != "") { strcpy(this->filename, this->b_matrixmarket.c_str()); this->matrix = rocsparse_matrix_file_mtx; } else if(this->ll == 0 && this->l != 0 && this->u != 0 && this->uu == 0) { this->matrix = rocsparse_matrix_tridiagonal; } else if(this->ll != 0 && this->l != 0 && this->u != 0 && this->uu != 0) { this->matrix = rocsparse_matrix_pentadiagonal; } else { this->matrix = rocsparse_matrix_random; } this->matrix_init_kind = rocsparse_matrix_init_kind_default; /* ============================================================================================ */ if(this->M < 0 || this->N < 0) { std::cerr << "Invalid dimension" << std::endl; return -1; } if(this->block_dim < 1) { std::cerr << "Invalid value for --blockdim" << std::endl; return -1; } if(this->row_block_dimA < 1) { std::cerr << "Invalid value for --row-blockdimA" << std::endl; return -1; } if(this->col_block_dimA < 1) { std::cerr << "Invalid value for --col-blockdimA" << std::endl; return -1; } if(this->row_block_dimB < 1) { std::cerr << "Invalid value for --row-blockdimB" << std::endl; return -1; } if(this->col_block_dimB < 1) { std::cerr << "Invalid value for --col-blockdimB" << std::endl; return -1; } switch(this->indextype) { case 's': { this->index_type_I = rocsparse_indextype_i32; this->index_type_J = rocsparse_indextype_i32; break; } case 'd': { this->index_type_I = rocsparse_indextype_i64; this->index_type_J = rocsparse_indextype_i64; break; } case 'm': { this->index_type_I = rocsparse_indextype_i64; this->index_type_J = rocsparse_indextype_i32; break; } default: { std::cerr << "Invalid value for --indextype" << std::endl; return -1; } } switch(this->precision) { case 's': { this->compute_type = rocsparse_datatype_f32_r; break; } case 'd': { this->compute_type = rocsparse_datatype_f64_r; break; } case 'c': { this->compute_type = rocsparse_datatype_f32_c; break; } case 'z': { this->compute_type = rocsparse_datatype_f64_c; break; } default: { std::cerr << "Invalid value for --precision" << std::endl; return -1; } } return 0; } int rocsparse_arguments_config::parse_no_default(int&argc,char**&argv, options_description&desc) { variables_map vm; store(parse_command_line(argc, argv, desc), vm); notify(vm); if(vm.count("help")) { std::cout << desc << std::endl; return -2; } if(this->b_dir != rocsparse_direction_row && this->b_dir != rocsparse_direction_column) { std::cerr << "Invalid value for --direction" << std::endl; return -1; } if(this->b_order != rocsparse_order_row && this->b_order != rocsparse_order_column) { std::cerr << "Invalid value for --order" << std::endl; return -1; } if(this->b_format != rocsparse_format_csr && this->b_format != rocsparse_format_coo && this->b_format != rocsparse_format_coo_aos && this->b_format != rocsparse_format_ell && this->b_format != rocsparse_format_csc) { std::cerr << "Invalid value for --format" << std::endl; return -1; } if(this->b_spmv_alg != rocsparse_spmv_alg_default && this->b_spmv_alg != rocsparse_spmv_alg_coo && this->b_spmv_alg != rocsparse_spmv_alg_csr_adaptive && this->b_spmv_alg != rocsparse_spmv_alg_csr_stream && this->b_spmv_alg != rocsparse_spmv_alg_ell && this->b_spmv_alg != rocsparse_spmv_alg_coo_atomic) { std::cerr << "Invalid value for --spmv_alg" << std::endl; return -1; } if(this->b_spmm_alg != rocsparse_spmm_alg_default && this->b_spmm_alg != rocsparse_spmm_alg_csr && this->b_spmm_alg != rocsparse_spmm_alg_coo_segmented && this->b_spmm_alg != rocsparse_spmm_alg_coo_atomic && this->b_spmm_alg != rocsparse_spmm_alg_csr_row_split && this->b_spmm_alg != rocsparse_spmm_alg_csr_merge && this->b_spmm_alg != rocsparse_spmm_alg_coo_segmented_atomic && this->b_spmm_alg != rocsparse_spmm_alg_bell) { std::cerr << "Invalid value for --spmm_alg" << std::endl; return -1; } if(this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_default && this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_thomas && this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_lu && this->b_gtsv_interleaved_alg != rocsparse_gtsv_interleaved_alg_qr) { std::cerr << "Invalid value for --gtsv_interleaved_alg" << std::endl; return -1; } if(b_transA == 'N') { this->transA = rocsparse_operation_none; } else if(b_transA == 'T') { this->transA = rocsparse_operation_transpose; } else if(b_transA == 'C') { this->transA = rocsparse_operation_conjugate_transpose; } if(b_transB == 'N') { this->transB = rocsparse_operation_none; } else if(b_transB == 'T') { this->transB = rocsparse_operation_transpose; } else if(b_transB == 'C') { this->transB = rocsparse_operation_conjugate_transpose; } sprintf(this->function,"%s",this->function_name.c_str()); this->baseA = (b_baseA == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->baseB = (b_baseB == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->baseC = (b_baseC == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->baseD = (b_baseD == 0) ? rocsparse_index_base_zero : rocsparse_index_base_one; this->action = (b_action == 0) ? rocsparse_action_numeric : rocsparse_action_symbolic; this->part = (b_part == 0) ? rocsparse_hyb_partition_auto : (b_part == 1) ? rocsparse_hyb_partition_user : rocsparse_hyb_partition_max; this->diag = (b_diag == 'N') ? rocsparse_diag_type_non_unit : rocsparse_diag_type_unit; this->uplo = (b_uplo == 'L') ? rocsparse_fill_mode_lower : rocsparse_fill_mode_upper; this->storage= (b_storage == 0) ? rocsparse_storage_mode_sorted : rocsparse_storage_mode_unsorted; this->apol = (b_apol == 'R') ? rocsparse_analysis_policy_reuse : rocsparse_analysis_policy_force; this->spol = rocsparse_solve_policy_auto; this->direction = (b_dir == rocsparse_direction_row) ? rocsparse_direction_row : rocsparse_direction_column; this->order = (b_order == rocsparse_order_row) ? rocsparse_order_row : rocsparse_order_column; this->format = (rocsparse_format)b_format; this->spmv_alg = (rocsparse_spmv_alg)this->b_spmv_alg; this->spmm_alg = (rocsparse_spmm_alg)this->b_spmm_alg; this->gtsv_interleaved_alg = (rocsparse_gtsv_interleaved_alg)this->b_gtsv_interleaved_alg; // rocALUTION parameter overrides filename parameter if(b_file != "") { strcpy(this->filename, b_file.c_str()); const char * p = b_file.c_str(); const char * q = nullptr; while(*p!='\0') { if (*p=='.') q = p; ++p;} if (q==nullptr) { std::cerr << "extension is not detected in filename '"<< b_file <<"' " << std::endl; return -1; } if (!strcmp(q,".mtx")) { this->matrix = rocsparse_matrix_file_mtx; } else if (!strcmp(q,".bin")) { this->matrix = rocsparse_matrix_file_rocsparseio; } else if (!strcmp(q,".csr")) { this->matrix = rocsparse_matrix_file_rocalution; } } else if(b_rocsparseio != "") { strcpy(this->filename, b_rocsparseio.c_str()); this->matrix = rocsparse_matrix_file_rocsparseio; } else if(b_rocalution != "") { strcpy(this->filename, b_rocalution.c_str()); this->matrix = rocsparse_matrix_file_rocalution; } else if(this->dimx != 0 && this->dimy != 0 && this->dimz != 0) { this->matrix = rocsparse_matrix_laplace_3d; } else if(this->dimx != 0 && this->dimy != 0) { this->matrix = rocsparse_matrix_laplace_2d; } else if(b_matrixmarket != "") { strcpy(this->filename, b_matrixmarket.c_str()); this->matrix = rocsparse_matrix_file_mtx; } else { this->matrix = rocsparse_matrix_random; } this->matrix_init_kind = rocsparse_matrix_init_kind_default; /* ============================================================================================ */ if(this->M < 0 || this->N < 0) { std::cerr << "Invalid dimension" << std::endl; return -1; } if(this->block_dim < 1) { std::cerr << "Invalid value for --blockdim" << std::endl; return -1; } if(this->row_block_dimA < 1) { std::cerr << "Invalid value for --row-blockdimA" << std::endl; return -1; } if(this->col_block_dimA < 1) { std::cerr << "Invalid value for --col-blockdimA" << std::endl; return -1; } if(this->row_block_dimB < 1) { std::cerr << "Invalid value for --row-blockdimB" << std::endl; return -1; } if(this->col_block_dimB < 1) { std::cerr << "Invalid value for --col-blockdimB" << std::endl; return -1; } switch(this->indextype) { case 's': { this->index_type_I = rocsparse_indextype_i32; this->index_type_J = rocsparse_indextype_i32; break; } case 'd': { this->index_type_I = rocsparse_indextype_i64; this->index_type_J = rocsparse_indextype_i64; break; } case 'm': { this->index_type_I = rocsparse_indextype_i64; this->index_type_J = rocsparse_indextype_i32; break; } default: { std::cerr << "Invalid value for --indextype" << std::endl; return -1; } } switch(this->precision) { case 's': { this->compute_type = rocsparse_datatype_f32_r; break; } case 'd': { this->compute_type = rocsparse_datatype_f64_r; break; } case 'c': { this->compute_type = rocsparse_datatype_f32_c; break; } case 'z': { this->compute_type = rocsparse_datatype_f64_c; break; } default: { std::cerr << "Invalid value for --precision" << std::endl; return -1; } } return 0; } rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_arguments_config.hpp000066400000000000000000000052001447342677400261040ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #include #include #include "program_options.hpp" #include "rocsparse_arguments.hpp" struct rocsparse_arguments_config : Arguments { public: char precision{}; char indextype{}; std::string function_name{}; rocsparse_int device_id{}; private: std::string b_matrixmarket{}; std::string b_rocalution{}; std::string b_rocsparseio{}; std::string b_file{}; char b_transA{}; char b_transB{}; int b_baseA{}; int b_baseB{}; int b_baseC{}; int b_baseD{}; int b_action{}; int b_part{}; int b_matrix_type{}; char b_diag{}; char b_uplo{}; int b_storage{}; char b_apol{}; rocsparse_int b_dir{}; rocsparse_int b_order{}; rocsparse_int b_format{}; rocsparse_int b_itilu0_alg{}; rocsparse_int b_spmv_alg{}; rocsparse_int b_spmm_alg{}; rocsparse_int b_gtsv_interleaved_alg{}; #ifdef ROCSPARSE_WITH_MEMSTAT std::string b_memory_report_filename{}; #endif public: rocsparse_arguments_config(); void set_description(options_description& desc); int parse(int& argc, char**& argv, options_description& desc); int parse_no_default(int& argc, char**& argv, options_description& desc); }; rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_bench.cpp000066400000000000000000000131021447342677400236240ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include #include #include "rocsparse_bench.hpp" #include "rocsparse_bench_cmdlines.hpp" // Return version. std::string rocsparse_get_version() { int rocsparse_ver; char rocsparse_rev[64]; { rocsparse_handle handle; rocsparse_create_handle(&handle); rocsparse_get_version(handle, &rocsparse_ver); rocsparse_get_git_rev(handle, rocsparse_rev); rocsparse_destroy_handle(handle); } std::ostringstream os; os << rocsparse_ver / 100000 << "." << rocsparse_ver / 100 % 1000 << "." << rocsparse_ver % 100 << "-" << rocsparse_rev; return os.str(); } void rocsparse_bench::parse(int& argc, char**& argv, rocsparse_arguments_config& config) { config.set_description(this->desc); config.unit_check = 0; config.timing = 1; config.alphai = 0.0; config.betai = 0.0; config.threshold = 0.0; config.percentage = 0.0; config.itilu0_alg = rocsparse_itilu0_alg_default; config.sddmm_alg = rocsparse_sddmm_alg_default; config.spmv_alg = rocsparse_spmv_alg_default; config.spsv_alg = rocsparse_spsv_alg_default; config.spitsv_alg = rocsparse_spitsv_alg_default; config.spsm_alg = rocsparse_spsm_alg_default; config.spmm_alg = rocsparse_spmm_alg_default; config.spgemm_alg = rocsparse_spgemm_alg_default; config.sparse_to_dense_alg = rocsparse_sparse_to_dense_alg_default; config.dense_to_sparse_alg = rocsparse_dense_to_sparse_alg_default; config.precision = 's'; config.indextype = 's'; int i = config.parse(argc, argv, this->desc); if(i == -1) { throw rocsparse_status_internal_error; } else if(i == -2) { // // Help. // rocsparse_bench_cmdlines::help(std::cout); exit(0); } } rocsparse_bench::rocsparse_bench() : desc("rocsparse client command line options") { } rocsparse_bench::rocsparse_bench(int& argc, char**& argv) : desc("rocsparse client command line options") { this->parse(argc, argv, this->config); routine(this->config.function_name.c_str()); // Device query int devs; if(hipGetDeviceCount(&devs) != hipSuccess) { std::cerr << "Error: cannot get device count" << std::endl; exit(-1); } auto device_id = this->config.device_id; // Set device if(hipSetDevice(device_id) != hipSuccess || device_id >= devs) { std::cerr << "Error: cannot set device ID " << device_id << std::endl; exit(-1); } } rocsparse_bench& rocsparse_bench::operator()(int& argc, char**& argv) { this->parse(argc, argv, this->config); routine(this->config.function_name.c_str()); return *this; } rocsparse_status rocsparse_bench::run() { return this->routine.dispatch(this->config.precision, this->config.indextype, this->config); } rocsparse_int rocsparse_bench::get_device_id() const { return this->config.device_id; } // This is used for backward compatibility. void rocsparse_bench::info_devices(std::ostream& out_) const { int devs; if(hipGetDeviceCount(&devs) != hipSuccess) { std::cerr << "Error: cannot get device count" << std::endl; exit(1); } std::cout << "Query device success: there are " << devs << " devices" << std::endl; for(int i = 0; i < devs; ++i) { hipDeviceProp_t prop; if(hipGetDeviceProperties(&prop, i) != hipSuccess) { std::cerr << "Error: cannot get device properties" << std::endl; exit(1); } out_ << "Device ID " << i << ": " << prop.name << std::endl; gpu_config g(prop); g.print(out_); } // // Print header. // { rocsparse_int device_id = this->get_device_id(); hipDeviceProp_t prop; hipGetDeviceProperties(&prop, device_id); out_ << "Using device ID " << device_id << " (" << prop.name << ") for rocSPARSE" << std::endl << "-------------------------------------------------------------------------" << std::endl << "rocSPARSE version: " << rocsparse_get_version() << std::endl << std::endl; } } rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_bench.hpp000066400000000000000000000103271447342677400236370ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #include #include #include "rocsparse_arguments_config.hpp" #include "rocsparse_routine.hpp" struct gpu_config { char name[32]; long memory_MB; long clockRate_MHz; long major; long minor; long maxGridSizeX; long sharedMemPerBlock_KB; long maxThreadsPerBlock; long warpSize; explicit gpu_config(const hipDeviceProp_t& prop) { strcpy(this->name, prop.name); this->memory_MB = (prop.totalGlobalMem >> 20); this->clockRate_MHz = prop.clockRate / 1000; this->major = prop.major; this->minor = prop.minor; this->maxGridSizeX = prop.maxGridSize[0]; this->sharedMemPerBlock_KB = (prop.sharedMemPerBlock >> 10); this->maxThreadsPerBlock = prop.maxThreadsPerBlock; this->warpSize = prop.warpSize; } void print(std::ostream& out_) { out_ << "-------------------------------------------------------------------------" << std::endl << "with " << this->memory_MB << "MB memory, clock rate " << this->clockRate_MHz << "MHz @ computing capability " << this->major << "." << this->minor << std::endl << "maxGridDimX " << this->maxGridSizeX << ", sharedMemPerBlock " << this->sharedMemPerBlock_KB << "KB, maxThreadsPerBlock " << this->maxThreadsPerBlock << std::endl << "wavefrontSize " << this->warpSize << std::endl << "-------------------------------------------------------------------------" << std::endl; } void print_json(std::ostream& out) { out << std::endl << "\"config gpu\": {" << std::endl << " \"memory\" : \"" << this->memory_MB << "\"," << std::endl << " \"clockrate\" : \"" << this->clockRate_MHz << "\"," << std::endl << " \"capability\" : \"" << this->major << "." << this->minor << "\"," << std::endl << " \"dimension\" : \"" << this->maxGridSizeX << "\"," << std::endl << " \"shared memory\" : \"" << this->sharedMemPerBlock_KB << "\"," << std::endl << " \"max thread per block\": \"" << this->maxThreadsPerBlock << "\"," << std::endl << " \"wavefront size\" : \"" << this->warpSize << "\"}," << std::endl; } }; class rocsparse_bench { private: void parse(int& argc, char**& argv, rocsparse_arguments_config& config); options_description desc; rocsparse_arguments_config config{}; rocsparse_routine routine{}; public: rocsparse_bench(); rocsparse_bench(int& argc, char**& argv); rocsparse_bench& operator()(int& argc, char**& argv); rocsparse_status run(); rocsparse_int get_device_id() const; void info_devices(std::ostream& out_) const; }; std::string rocsparse_get_version(); rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_bench_app.cpp000066400000000000000000000351421447342677400244740ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_bench_app.hpp" #include "rocsparse_bench.hpp" #include "rocsparse_random.hpp" #include rocsparse_bench_app* rocsparse_bench_app::s_instance = nullptr; rocsparse_bench_app_base::rocsparse_bench_app_base(int argc, char** argv) : m_initial_argc(rocsparse_bench_app_base::save_initial_cmdline(argc, argv, &m_initial_argv)) , m_bench_cmdlines(argc, argv) , m_bench_timing(m_bench_cmdlines.get_nsamples(), m_bench_cmdlines.get_nruns()) {}; rocsparse_status rocsparse_bench_app_base::run_case(int isample, int irun, int argc, char** argv) { rocsparse_bench bench(argc, argv); return bench.run(); } rocsparse_status rocsparse_bench_app_base::run_cases() { int sample_argc; char** sample_argv = nullptr; // // Loop over cases. // int nruns = this->m_bench_cmdlines.get_nruns(); int nsamples = this->m_bench_cmdlines.get_nsamples(); if(is_stdout_disabled()) { printf("// start benchmarking ... (nsamples = %d, nruns = %d)\n", nsamples, nruns); } for(int isample = 0; isample < nsamples; ++isample) { this->m_isample = isample; // // Add an item to collect data through rocsparse_record_timing // for(int irun = 0; irun < nruns; ++irun) { this->m_irun = irun; // // Get command line arguments, copy each time since it is mutable afterwards. // if(sample_argv == nullptr) { this->m_bench_cmdlines.get_argc(this->m_isample, sample_argc); sample_argv = new char*[sample_argc]; } this->m_bench_cmdlines.get(this->m_isample, sample_argc, sample_argv); // // Run the case. // rocsparse_status status = this->run_case(this->m_isample, this->m_irun, sample_argc, sample_argv); if(status != rocsparse_status_success) { std::cerr << "run_cases::run_case failed at line " << __LINE__ << std::endl; return status; } if(is_stdout_disabled()) { if((isample * nruns + irun) % 10 == 0) { fprintf(stdout, "\r// %2.0f%%", (double(isample * nruns + irun + 1) / double(nsamples * nruns)) * 100); fflush(stdout); } } } } if(is_stdout_disabled()) { printf("\r// benchmarking done.\n"); } if(sample_argv != nullptr) { delete[] sample_argv; } return rocsparse_status_success; }; rocsparse_bench_app::rocsparse_bench_app(int argc, char** argv) : rocsparse_bench_app_base(argc, argv) { } rocsparse_bench_app::~rocsparse_bench_app() {} void rocsparse_bench_app::confidence_interval(const double alpha, const int resize, const int nboots, const std::vector& v, double interval[2]) { const size_t size = v.size(); std::vector medians(nboots); std::vector resample(resize); #define median_value(n__, s__) \ ((n__ % 2 == 0) ? (s__[n__ / 2 - 1] + s__[n__ / 2]) * 0.5 : s__[n__ / 2]) for(int iboot = 0; iboot < nboots; ++iboot) { for(int i = 0; i < resize; ++i) { const int j = random_generator_exact(0, size - 1); resample[i] = v[j]; } std::sort(resample.begin(), resample.end()); medians[iboot] = median_value(resize, resample); } std::sort(medians.begin(), medians.end()); interval[0] = medians[int(floor(nboots * 0.5 * (1.0 - alpha)))]; interval[1] = medians[int(ceil(nboots * (1.0 - 0.5 * (1.0 - alpha))))]; #undef median_value } void rocsparse_bench_app::export_item(std::ostream& out, rocsparse_bench_timing_t::item_t& item) { // // // auto N = item.m_nruns; if(N > 1) { const double alpha = 0.95; std::sort(item.msec.begin(), item.msec.end()); std::sort(item.gflops.begin(), item.gflops.end()); std::sort(item.gbs.begin(), item.gbs.end()); double msec = (N % 2 == 0) ? (item.msec[N / 2 - 1] + item.msec[N / 2]) * 0.5 : item.msec[N / 2]; double gflops = (N % 2 == 0) ? (item.gflops[N / 2 - 1] + item.gflops[N / 2]) * 0.5 : item.gflops[N / 2]; double gbs = (N % 2 == 0) ? (item.gbs[N / 2 - 1] + item.gbs[N / 2]) * 0.5 : item.gbs[N / 2]; double interval_msec[2], interval_gflops[2], interval_gbs[2]; int nboots = 200; confidence_interval(alpha, 10, nboots, item.msec, interval_msec); confidence_interval(alpha, 10, nboots, item.gflops, interval_gflops); confidence_interval(alpha, 10, nboots, item.gbs, interval_gbs); out << std::endl << " \"time\": [\"" << msec << "\", \"" << interval_msec[0] << "\", \"" << interval_msec[1] << "\"]," << std::endl; out << " \"flops\": [\"" << gflops << "\", \"" << interval_gflops[0] << "\", \"" << interval_gflops[1] << "\"]," << std::endl; out << " \"bandwidth\": [\"" << gbs << "\", \"" << interval_gbs[0] << "\", \"" << interval_gbs[1] << "\"]"; if(!no_rawdata()) { out << ","; out << std::endl << " \"raw_legend\": \"" << item.outputs_legend << "\""; out << ","; out << std::endl << " \"raw_data\": \"" << item.outputs[0] << "\""; } } else { out << std::endl << "\"time\": [\"" << item.msec[0] << "\", \"" << item.msec[0] << "\", \"" << item.msec[0] << "\"]," << std::endl; out << "\"flops\": [\"" << item.gflops[0] << "\", \"" << item.gflops[0] << "\", \"" << item.gflops[0] << "\"]," << std::endl; out << "\"bandwidth\": [\"" << item.gbs[0] << "\", \"" << item.gbs[0] << "\", \"" << item.gbs[0] << "\"]"; if(!no_rawdata()) { out << ","; out << std::endl << " \"raw_legend\": \"" << item.outputs_legend << "\""; out << ","; out << std::endl << " \"raw_data\": "; out << "\"" << item.outputs[0] << "\""; out << "" << std::endl; } } } rocsparse_status rocsparse_bench_app::export_file() { const char* ofilename = this->m_bench_cmdlines.get_ofilename(); if(ofilename == nullptr) { std::cerr << "//" << std::endl; std::cerr << "// rocsparse_bench_app warning: no output filename has been specified," << std::endl; std::cerr << "// default output filename is 'a.json'." << std::endl; std::cerr << "//" << std::endl; ofilename = "a.json"; } std::ofstream out(ofilename); int sample_argc; char* sample_argv[64]; rocsparse_status status; // // Write header. // status = define_results_json(out); if(status != rocsparse_status_success) { std::cerr << "run_cases failed at line " << __LINE__ << std::endl; return status; } // // Loop over cases. // const int nsamples = m_bench_cmdlines.get_nsamples(); if(nsamples != m_bench_timing.size()) { std::cerr << "incompatible sizes at line " << __LINE__ << " " << m_bench_cmdlines.get_nsamples() << " " << m_bench_timing.size() << std::endl; if(m_bench_timing.size() == 0) { std::cerr << "No data has been harvested from running case" << std::endl; } exit(1); } for(int isample = 0; isample < nsamples; ++isample) { this->m_bench_cmdlines.get(isample, sample_argc, sample_argv); this->define_case_json(out, isample, sample_argc, sample_argv); out << "{ "; { this->export_item(out, this->m_bench_timing[isample]); } out << " }"; this->close_case_json(out, isample, sample_argc, sample_argv); } // // Write footer. // status = this->close_results_json(out); if(status != rocsparse_status_success) { std::cerr << "run_cases failed at line " << __LINE__ << std::endl; return status; } out.close(); return rocsparse_status_success; } rocsparse_status rocsparse_bench_app::define_case_json(std::ostream& out, int isample, int argc, char** argv) { if(isample > 0) out << "," << std::endl; out << std::endl; out << "{ \"cmdline\": \""; out << argv[0]; for(int i = 1; i < argc; ++i) out << " " << argv[i]; out << " \"," << std::endl; out << " \"timing\": "; return rocsparse_status_success; } rocsparse_status rocsparse_bench_app::close_case_json(std::ostream& out, int isample, int argc, char** argv) { out << " }"; return rocsparse_status_success; } rocsparse_status rocsparse_bench_app::define_results_json(std::ostream& out) { out << "{" << std::endl; auto end = std::chrono::system_clock::now(); std::time_t end_time = std::chrono::system_clock::to_time_t(end); char* str = std::ctime(&end_time); for(int i = 0; i >= 0; ++i) if(str[i] == '\n') { str[i] = '\0'; break; } out << "\"date\": \"" << str << "\"," << std::endl; out << "\"rocSPARSE version\": \"" << rocsparse_get_version() << "\"," << std::endl; // // !!! To fix, not necessarily the gpu used from rocsparse_bench. // hipDeviceProp_t prop; hipGetDeviceProperties(&prop, 0); gpu_config g(prop); g.print_json(out); out << std::endl << "\"cmdline\": \"" << this->m_initial_argv[0]; for(int i = 1; i < this->m_initial_argc; ++i) { out << " " << this->m_initial_argv[i]; } out << "\"," << std::endl; int option_index_x = this->m_bench_cmdlines.get_option_index_x(); out << std::endl << "\"xargs\": \["; for(int j = 0; j < this->m_bench_cmdlines.get_option_nargs(option_index_x); ++j) { auto arg = this->m_bench_cmdlines.get_option_arg(option_index_x, j); if(j > 0) out << ", "; out << "\"" << arg << "\""; } out << "]," << std::endl; out << std::endl << "\"yargs\":"; // // Harvest expanded options. // std::vector y_options_size; std::vector y_options_index; for(int k = 0; k < this->m_bench_cmdlines.get_noptions(); ++k) { if(k != option_index_x) { if(this->m_bench_cmdlines.get_option_nargs(k) > 1) { y_options_index.push_back(k); y_options_size.push_back(this->m_bench_cmdlines.get_option_nargs(k)); } } } const int num_y_options = y_options_index.size(); if(num_y_options > 0) { std::vector> indices(num_y_options); for(int k = 0; k < num_y_options; ++k) { indices[k].resize(y_options_size[k], 0); } } int nplots = this->m_bench_cmdlines.get_nsamples() / this->m_bench_cmdlines.get_option_nargs(option_index_x); std::vector plot_titles(nplots); if(plot_titles.size() == 1) { plot_titles.push_back(""); } else { int n = y_options_size[0]; auto argname0 = this->m_bench_cmdlines.get_option_name(y_options_index[0]); for(int iplot = 0; iplot < nplots; ++iplot) { std::string title(""); int p = n; { int jref = iplot % p; auto arg0 = this->m_bench_cmdlines.get_option_arg(y_options_index[0], jref); if(argname0[1] == '-') { title += std::string(argname0 + 2) + std::string("=") + arg0; } else { title += std::string(argname0 + 1) + std::string("=") + arg0; } } for(int k = 1; k < num_y_options; ++k) { int kref = iplot / p; p *= this->m_bench_cmdlines.get_option_nargs(y_options_index[k]); auto arg = this->m_bench_cmdlines.get_option_arg(y_options_index[k], kref); auto argname = this->m_bench_cmdlines.get_option_name(y_options_index[k]); if(argname[1] == '-') { title += std::string(",") + std::string(argname + 2) + std::string("=") + arg; } else { title += std::string(",") + std::string(argname + 1) + std::string("=") + arg; } } plot_titles[iplot] = title; } } out << "["; { out << "\"" << plot_titles[0] << "\""; for(int iplot = 1; iplot < nplots; ++iplot) out << ", \"" << plot_titles[iplot] << "\""; } out << "]," << std::endl << std::endl; ; out << "\"" << "results" << "\": ["; return rocsparse_status_success; } rocsparse_status rocsparse_bench_app::close_results_json(std::ostream& out) { out << "]" << std::endl; out << "}" << std::endl; return rocsparse_status_success; } rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_bench_app.hpp000066400000000000000000000207731447342677400245050ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #include "rocsparse-types.h" #include "rocsparse_bench_cmdlines.hpp" #include #include struct rocsparse_benchfile_format { typedef enum value_type_ : rocsparse_int { json = 0, yaml } value_type; protected: value_type value{json}; public: inline constexpr operator value_type() const { return this->value; }; inline constexpr rocsparse_benchfile_format(){}; inline constexpr explicit rocsparse_benchfile_format(rocsparse_int ival) : value((value_type)ival) { } static constexpr value_type all[2] = {rocsparse_benchfile_format::json, rocsparse_benchfile_format::yaml}; inline bool is_invalid() const { switch(this->value) { case json: case yaml: { return false; } } return true; }; inline explicit rocsparse_benchfile_format(const char* ext) { if(!strcmp(ext, ".json")) { value = json; } else if(!strcmp(ext, ".JSON")) { value = json; } else if(!strcmp(ext, ".yaml")) { value = yaml; } else if(!strcmp(ext, ".YAML")) { value = yaml; } else value = (value_type)-1; }; inline const char* to_string() const { switch(this->value) { #define CASE(case_name) \ case case_name: \ { \ return #case_name; \ } CASE(json); CASE(yaml); #undef CASE } return "unknown"; } }; // // Struct collecting benchmark timing results. // struct rocsparse_bench_timing_t { // // Local item // struct item_t { int m_nruns{}; std::vector msec{}; std::vector gflops{}; std::vector gbs{}; std::vector outputs{}; std::string outputs_legend{}; item_t(){}; explicit item_t(int nruns_) : m_nruns(nruns_) , msec(nruns_) , gflops(nruns_) , gbs(nruns_) , outputs(nruns_){}; item_t& operator()(int nruns_) { this->m_nruns = nruns_; this->msec.resize(nruns_); this->gflops.resize(nruns_); this->gbs.resize(nruns_); this->outputs.resize(nruns_); return *this; }; rocsparse_status record(int irun, double msec_, double gflops_, double gbs_) { if(irun >= 0 && irun < m_nruns) { this->msec[irun] = msec_; this->gflops[irun] = gflops_; this->gbs[irun] = gbs_; return rocsparse_status_success; } else { return rocsparse_status_internal_error; } } rocsparse_status record(int irun, const std::string& s) { if(irun >= 0 && irun < m_nruns) { this->outputs[irun] = s; return rocsparse_status_success; } else { return rocsparse_status_internal_error; } } rocsparse_status record_output_legend(const std::string& s) { this->outputs_legend = s; return rocsparse_status_success; } }; size_t size() const { return this->m_items.size(); }; item_t& operator[](size_t i) { return this->m_items[i]; } const item_t& operator[](size_t i) const { return this->m_items[i]; } rocsparse_bench_timing_t(int nsamples, int nruns_per_sample) : m_items(nsamples) { for(int i = 0; i < nsamples; ++i) { m_items[i](nruns_per_sample); } } private: std::vector m_items; }; class rocsparse_bench_app_base { protected: // // Record initial command line. // int m_initial_argc{}; char** m_initial_argv; // // Set of command lines. // rocsparse_bench_cmdlines m_bench_cmdlines; // // // rocsparse_bench_timing_t m_bench_timing; bool m_stdout_disabled{true}; static int save_initial_cmdline(int argc, char** argv, char*** argv_) { argv_[0] = new char*[argc]; for(int i = 0; i < argc; ++i) { argv_[0][i] = argv[i]; } return argc; } // // @brief Constructor. // rocsparse_bench_app_base(int argc, char** argv); // // @brief Run case. // rocsparse_status run_case(int isample, int irun, int argc, char** argv); // // For internal use, to get the current isample and irun. // int m_isample{}; int m_irun{}; int get_isample() const { return this->m_isample; }; int get_irun() const { return this->m_irun; }; public: bool is_stdout_disabled() const { return m_bench_cmdlines.is_stdout_disabled(); } bool no_rawdata() const { return m_bench_cmdlines.no_rawdata(); } // // @brief Run cases. // rocsparse_status run_cases(); }; class rocsparse_bench_app : public rocsparse_bench_app_base { private: static rocsparse_bench_app* s_instance; public: static rocsparse_bench_app* instance(int argc, char** argv) { s_instance = new rocsparse_bench_app(argc, argv); return s_instance; } static rocsparse_bench_app* instance() { return s_instance; } rocsparse_bench_app(const rocsparse_bench_app&) = delete; rocsparse_bench_app& operator=(const rocsparse_bench_app&) = delete; static bool applies(int argc, char** argv) { return rocsparse_bench_cmdlines::applies(argc, argv); } rocsparse_bench_app(int argc, char** argv); ~rocsparse_bench_app(); rocsparse_status export_file(); rocsparse_status record_timing(double msec, double gflops, double bandwidth) { return this->m_bench_timing[this->m_isample].record(this->m_irun, msec, gflops, bandwidth); } rocsparse_status record_output(const std::string& s) { return this->m_bench_timing[this->m_isample].record(this->m_irun, s); } rocsparse_status record_output_legend(const std::string& s) { return this->m_bench_timing[this->m_isample].record_output_legend(s); } protected: void export_item(std::ostream& out, rocsparse_bench_timing_t::item_t& item); rocsparse_status define_case_json(std::ostream& out, int isample, int argc, char** argv); rocsparse_status close_case_json(std::ostream& out, int isample, int argc, char** argv); rocsparse_status define_results_json(std::ostream& out); rocsparse_status close_results_json(std::ostream& out); void confidence_interval(const double alpha, const int resize, const int nboots, const std::vector& v, double interval[2]); }; rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_bench_cmdlines.cpp000066400000000000000000000056411447342677400255130ustar00rootroot00000000000000#include "rocsparse_bench_cmdlines.hpp" // // @brief Get the output filename. // const char* rocsparse_bench_cmdlines::get_ofilename() const { return this->m_cmd.get_ofilename(); } // // @brief Get the number of samples.. // int rocsparse_bench_cmdlines::get_nsamples() const { return this->m_cmd.get_nsamples(); }; int rocsparse_bench_cmdlines::get_option_index_x() const { return this->m_cmd.get_option_index_x(); }; int rocsparse_bench_cmdlines::get_option_nargs(int i) { return this->m_cmd.get_option_nargs(i); } const char* rocsparse_bench_cmdlines::get_option_arg(int i, int j) { return this->m_cmd.get_option_arg(i, j); } const char* rocsparse_bench_cmdlines::get_option_name(int i) { return this->m_cmd.get_option_name(i); } int rocsparse_bench_cmdlines::get_noptions_x() const { return this->m_cmd.get_noptions_x(); }; int rocsparse_bench_cmdlines::get_noptions() const { return this->m_cmd.get_noptions(); }; bool rocsparse_bench_cmdlines::is_stdout_disabled() const { return this->m_cmd.is_stdout_disabled(); }; bool rocsparse_bench_cmdlines::no_rawdata() const { return this->m_cmd.no_rawdata(); }; // // @brief Get the number of runs per sample. // int rocsparse_bench_cmdlines::get_nruns() const { return this->m_cmd.get_nruns(); }; // // @brief Copy the command line arguments corresponding to a given sample. // void rocsparse_bench_cmdlines::get(int isample, int& argc, char** argv) const { const auto& cmdsample = this->m_cmdset[isample]; for(int j = 0; j < cmdsample.argc; ++j) { argv[j] = cmdsample.argv[j]; } argc = cmdsample.argc; } void rocsparse_bench_cmdlines::get_argc(int isample, int& argc_) const { argc_ = this->m_cmdset[isample].argc; } rocsparse_bench_cmdlines::~rocsparse_bench_cmdlines() { if(this->m_cmdset != nullptr) { delete[] this->m_cmdset; this->m_cmdset = nullptr; } } // // @brief Constructor. // rocsparse_bench_cmdlines::rocsparse_bench_cmdlines(int argc, char** argv) : m_cmd(argc, argv) { // // Expand the command line . // this->m_cmdset = new val[this->m_cmd.get_nsamples()]; this->m_cmd.expand(this->m_cmdset); } bool rocsparse_bench_cmdlines::applies(int argc, char** argv) { for(int i = 1; i < argc; ++i) { if(!strcmp(argv[i], "--bench-x")) { return true; } } return false; } void rocsparse_bench_cmdlines::info() const { int nsamples = this->m_cmd.get_nsamples(); for(int isample = 0; isample < nsamples; ++isample) { const auto& cmdsample = this->m_cmdset[isample]; const auto argc = cmdsample.argc; const auto argv = cmdsample.argv; std::cout << "sample[" << isample << "/" << nsamples << "], argc = " << argc << std::endl; for(int jarg = 0; jarg < argc; ++jarg) { std::cout << " " << argv[jarg]; } std::cout << std::endl; } } rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_bench_cmdlines.hpp000066400000000000000000000477131447342677400255260ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #include "rocsparse_clients_envariables.hpp" #include #include #include #include // // @brief The role of this class is to expand a command line into multiple command lines. // @details // // What is expanding a command line into multiple command lines? // Let's consider the following command line './foo -m 10' where option '-m' of ./foo // takes only one argument, here 10. // // An expansion mechanism is implemented in this class to provide the set of command lines // './foo -m 10', // './foo -m 2', // './foo -m 7', // './foo -m -4' // // from: ./foo -m 10 2 7 -4 // // It allows to generate a set of command lines to be useful in a benchmarking context. // // Rules: // - any keyword starting with '-' is considered as an option. // - each option having exactly one argument is subject to a possible expansion, there is no limit on the number of options to expand. // // // Number of command lines generated : product of all the options' number (>=1) of arguments // examples: // cmd: './foo -m 10 2 7 -k 32 -l f -v' gives // './foo -m 10 -k 32 -l f' // './foo -m 2 -k 32 -l f' // './foo -m 7 -k 32 -l f' // num cmds: max(1,3) * max(1,1) * max(1,0) = 3 // cmd: './foo -m 10 2 7 -k 32 64 -l f g' gives // './foo -m 10 -k 32 -l f' // './foo -m 2 -k 32 -l f' // './foo -m 7 -k 32 -l f' // './foo -m 10 -k 64 -l f' // './foo -m 2 -k 64 -l f' // './foo -m 7 -k 64 -l f' // './foo -m 10 -k 32 -l g' // './foo -m 2 -k 32 -l g' // './foo -m 7 -k 32 -l g' // './foo -m 10 -k 64 -l g' // './foo -m 2 -k 64 -l g' // './foo -m 7 -k 64 -l g' // num cmds: max(1,3) * max(1,2) * max(1,2) = 12 // // Specific options: // // option: --bench-x, to precede the option the user want to be the first one. // example // cmd: './foo -m 10 2 7 --bench-x -k 32 64 -l f g' gives // './foo -m 32 -k 10 -l f' // './foo -m 64 -k 10 -l f' // './foo -m 32 -k 2 -l f' // './foo -m 64 -k 2 -l f' // './foo -m 32 -k 7 -l f' // './foo -m 64 -k 7 -l f' // './foo -m 32 -k 10 -l g' // './foo -m 64 -k 10 -l g' // './foo -m 32 -k 2 -l g' // './foo -m 64 -k 2 -l g' // './foo -m 32 -k 7 -l g' // './foo -m 64 -k 7 -l g' // // option: --bench-o, output filename. // option: --bench-n, number of runs. // option: --bench-std, prevent from standard output to be disabled. // class rocsparse_bench_cmdlines { private: struct val { // // Everything is public. // public: int argc{}; char** argv{}; val& operator=(const val&) = delete; ~val() { if(this->argv != nullptr) { delete[] this->argv; this->argv = nullptr; } } val(){}; val(const val& v) = delete; explicit val(int n) : argc(n) { this->argv = new char*[this->argc]; } val& operator()(int n) { this->argc = n; if(this->argv) { delete[] this->argv; } this->argv = new char*[this->argc]; return *this; } }; struct cmdline { public: // // @brief Return the output filename. // const char* get_ofilename() const { return this->m_ofilename; }; // // @brief Return the number of plots. // int get_nplots() const { return this->get_nsamples() / this->m_options[this->m_option_index_x].args.size(); }; int get_noptions_x() const { return this->m_options[this->m_option_index_x].args.size(); }; int get_noptions() const { return this->m_options.size(); }; int get_option_nargs(int i) { return this->m_options[i].args.size(); } const char* get_option_arg(int i, int j) { return this->m_options[i].args[j].name; } const char* get_option_name(int i) { return this->m_options[i].name; } int get_nsamples() const { return this->m_nsamples; } int get_option_index_x() const { return this->m_option_index_x; } int get_nruns() const { return this->m_bench_nruns; } bool is_stdout_disabled() const { return this->m_is_stdout_disabled; } bool no_rawdata() const { return this->m_no_rawdata; } // // Constructor. // cmdline(int argc, char** argv) { // // Any option --bench-? // // // Try to get the option --bench-n. // int detected_option_bench_n = detect_option(argc, argv, "--bench-n", this->m_bench_nruns); if(detected_option_bench_n == -1) { std::cerr << "missing parameter ?" << std::endl; exit(1); } // // Try to get the option --bench-o. // int detected_option_bench_o = detect_option_string(argc, argv, "--bench-o", this->m_ofilename); if(detected_option_bench_o == -1) { std::cerr << "missing parameter ?" << std::endl; exit(1); } // // Try to get the option --bench-x. // const char* option_x = nullptr; int detected_option_bench_x = detect_option_string(argc, argv, "--bench-x", option_x); if(detected_option_bench_x == -1 || false == is_option(option_x)) { std::cerr << "wrong position of option --bench-x ?" << std::endl; exit(1); } this->m_name = argv[0]; this->m_has_bench_option = (detected_option_bench_x || detected_option_bench_o || detected_option_bench_n); this->m_no_rawdata = detect_flag(argc, argv, "--bench-no-rawdata"); this->m_is_stdout_disabled = (false == detect_flag(argc, argv, "--bench-std")); int jarg = -1; for(int iarg = 1; iarg < argc; ++iarg) { if(argv[iarg] == option_x) { jarg = iarg; break; } } int iarg = 1; while(iarg < argc) { // // Any argument starting with the character '-' is considered as an option. // if(is_option(argv[iarg])) { if(!strcmp(argv[iarg], "--bench-std")) { ++iarg; } else if(!strcmp(argv[iarg], "--bench-o")) { iarg += 2; } else if(!strcmp(argv[iarg], "--bench-x")) { ++iarg; } else if(!strcmp(argv[iarg], "--bench-n")) { iarg += 2; } else { // // Create the option. // cmdline_option option(argv[iarg]); // // Calculate the number of arguments based on the position of the next option, if any. // const int option_nargs = count_option_nargs(iarg, argc, argv); const int next_option_index = iarg + 1 + option_nargs; for(int k = iarg + 1; k < next_option_index; ++k) { option.args.push_back(cmdline_arg(argv[k])); } // // If this option has been flagged being the 'X' field. // otherwise, other ('Y') options will be classified from the order of their appearances as Y1, Y2, Y3. // if(jarg == iarg) // { this->m_option_index_x = this->m_options.size(); } // // Insert the option created. // this->m_options.push_back(option); iarg = next_option_index; } } else { // // Regular argument. // this->m_args.push_back(cmdline_arg(argv[iarg])); ++iarg; } } this->m_nsamples = 1; for(size_t ioption = 0; ioption < this->m_options.size(); ++ioption) { size_t n = this->m_options[ioption].args.size(); this->m_nsamples *= std::max(n, static_cast(1)); } } void expand(val* p) { const auto num_options = this->m_options.size(); const auto num_samples = this->m_nsamples; for(int i = 0; i < num_samples; ++i) { p[i](1 + this->m_args.size() + num_options * 2); p[i].argc = 0; } // // Program name. // for(int i = 0; i < num_samples; ++i) { p[i].argv[p[i].argc++] = this->m_name; } // // Arguments without options // for(auto& arg : this->m_args) { for(int i = 0; i < num_samples; ++i) p[i].argv[p[i].argc++] = arg.name; } const int option_x_nargs = this->m_options[this->m_option_index_x].args.size(); int N = option_x_nargs; for(int iopt = 0; iopt < num_options; ++iopt) { cmdline_option& option = this->m_options[iopt]; // // // for(int isample = 0; isample < num_samples; ++isample) { p[isample].argv[p[isample].argc++] = option.name; } if(iopt == this->m_option_index_x) { // // // { const int ngroups = num_samples / option_x_nargs; for(int jgroup = 0; jgroup < ngroups; ++jgroup) { for(int ix = 0; ix < option_x_nargs; ++ix) { const int flat_index = jgroup * option_x_nargs + ix; p[flat_index].argv[p[flat_index].argc++] = option.args[ix].name; } } } // // // for(int isample = 0; isample < num_samples; ++isample) { if(p[isample].argc != p[0].argc) { std::cerr << "invalid struct line " << __LINE__ << std::endl; } } } else { const int option_narg = option.args.size(); if(option_narg > 1) { const int ngroups = num_samples / (N * option_narg); for(int jgroup = 0; jgroup < ngroups; ++jgroup) { for(int option_iarg = 0; option_iarg < option_narg; ++option_iarg) { for(int i = 0; i < N; ++i) { const int flat_index = N * (jgroup * option_narg + option_iarg) + i; p[flat_index].argv[p[flat_index].argc++] = option.args[option_iarg].name; } } } N *= std::max(option_narg, 1); } else { if(option_narg == 1) { for(int isample = 0; isample < num_samples; ++isample) { p[isample].argv[p[isample].argc++] = option.args[0].name; } } } } } } private: static inline int count_option_nargs(int iarg, int argc, char** argv) { int c = 0; for(int j = iarg + 1; j < argc; ++j) { if(is_option(argv[j])) { return c; } ++c; } return c; } static bool detect_flag(int argc, char** argv, const char* option_name) { for(int iarg = 1; iarg < argc; ++iarg) { if(!strcmp(argv[iarg], option_name)) { return true; } } return false; } template static int detect_option(int argc, char** argv, const char* option_name, T& value) { for(int iarg = 1; iarg < argc; ++iarg) { if(!strcmp(argv[iarg], option_name)) { ++iarg; if(iarg < argc) { std::istringstream iss(argv[iarg]); iss >> value; return 1; } else { std::cerr << "missing value for option --bench-n " << std::endl; return -1; } } } return 0; } static int detect_option_string(int argc, char** argv, const char* option_name, const char*& value) { for(int iarg = 1; iarg < argc; ++iarg) { if(!strcmp(argv[iarg], option_name)) { ++iarg; if(iarg < argc) { value = argv[iarg]; return 1; } else { std::cerr << "missing value for option " << option_name << std::endl; return -1; } } } return 0; } // // argument name. // struct cmdline_arg { char* name{}; explicit cmdline_arg(char* name_) : name(name_){}; }; // // argument option. // struct cmdline_option { char* name{}; std::vector args{}; explicit cmdline_option(char* name_) : name(name_){}; }; static inline bool is_option(const char* arg) { return arg[0] == '-'; } // // Name. // char* m_name; // // set of options. // std::vector m_options; // // set of arguments. // std::vector m_args; bool m_has_bench_option{}; int m_bench_nruns{1}; int m_option_index_x; int m_nsamples; bool m_is_stdout_disabled{true}; bool m_no_rawdata{}; const char* m_ofilename{}; }; private: cmdline m_cmd; val* m_cmdset{}; public: static void help(std::ostream& out) { out << "" << std::endl; out << "Specific environment variables:" << std::endl; for(const auto v : rocsparse_clients_envariables::s_var_bool_all) { out << rocsparse_clients_envariables::get_name(v) << " " << rocsparse_clients_envariables::get_description(v) << std::endl; } for(const auto v : rocsparse_clients_envariables::s_var_string_all) { out << rocsparse_clients_envariables::get_name(v) << " " << rocsparse_clients_envariables::get_description(v) << std::endl; } out << "" << std::endl; out << "Benchmarks options:" << std::endl; out << "--bench-x flag to preceed the main option " << std::endl; out << "--bench-o output JSON file, (default = a.json)" << std::endl; out << "--bench-n number of runs, (default = 1)" << std::endl; out << "--bench-no-rawdata do not export raw data." << std::endl; out << "" << std::endl; out << "Example:" << std::endl; out << "rocsparse-bench -f csrmv --bench-x -M 10 20 30 40" << std::endl; } // // @brief Get the output filename. // const char* get_ofilename() const; // // @brief Get the number of samples.. // int get_nsamples() const; int get_option_index_x() const; int get_option_nargs(int i); const char* get_option_arg(int i, int j); const char* get_option_name(int i); int get_noptions_x() const; int get_noptions() const; bool is_stdout_disabled() const; bool no_rawdata() const; // // @brief Get the number of runs per sample. // int get_nruns() const; void get(int isample, int& argc, char** argv) const; void get_argc(int isample, int& argc_) const; rocsparse_bench_cmdlines& operator=(const rocsparse_bench_cmdlines&) = delete; // // @brief Constructor. // rocsparse_bench_cmdlines(int argc, char** argv); rocsparse_bench_cmdlines(const rocsparse_bench_cmdlines&) = delete; virtual ~rocsparse_bench_cmdlines(); static bool applies(int argc, char** argv); // // @brief Some info. // void info() const; }; rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_routine.cpp000066400000000000000000000520661447342677400242460ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_routine.hpp" #include "rocsparse.h" // // // rocsparse_routine::rocsparse_routine(const char* function) { for(auto routine : all_routines) { const char* str = s_routine_names[routine]; if(!strcmp(function, str)) { this->value = routine; return; } } std::cerr << "// function " << function << " is invalid, list of valid function is" << std::endl; for(auto routine : all_routines) { const char* str = s_routine_names[routine]; std::cerr << "// - " << str << std::endl; } throw rocsparse_status_invalid_value; } // // // rocsparse_routine::rocsparse_routine() : value((value_type)-1){}; // // // rocsparse_routine& rocsparse_routine::operator()(const char* function) { for(auto routine : all_routines) { const char* str = s_routine_names[routine]; if(!strcmp(function, str)) { this->value = routine; return *this; } } std::cerr << "// function " << function << " is invalid, list of valid function is" << std::endl; for(auto routine : all_routines) { const char* str = s_routine_names[routine]; std::cerr << "// - " << str << std::endl; } throw rocsparse_status_invalid_value; } // // // constexpr rocsparse_routine::value_type rocsparse_routine::all_routines[]; template rocsparse_status rocsparse_routine::dispatch_indextype(const char cindextype, const Arguments& arg) { const rocsparse_indextype indextype = (cindextype == 'm') ? rocsparse_indextype_i64 : (cindextype == 's') ? rocsparse_indextype_i32 : (cindextype == 'd') ? rocsparse_indextype_i64 : ((rocsparse_indextype)-1); const bool mixed = (cindextype == 'm'); switch(indextype) { case rocsparse_indextype_u16: { break; } case rocsparse_indextype_i32: { return dispatch_call(arg); } case rocsparse_indextype_i64: { if(mixed) { return dispatch_call(arg); } else { return dispatch_call(arg); } } } return rocsparse_status_invalid_value; } // // // template rocsparse_status rocsparse_routine::dispatch_precision(const char precision, const char indextype, const Arguments& arg) { const rocsparse_datatype datatype = (precision == 's') ? rocsparse_datatype_f32_r : (precision == 'd') ? rocsparse_datatype_f64_r : (precision == 'c') ? rocsparse_datatype_f32_c : (precision == 'z') ? rocsparse_datatype_f64_c : ((rocsparse_datatype)-1); switch(datatype) { case rocsparse_datatype_f32_r: return dispatch_indextype(indextype, arg); case rocsparse_datatype_f64_r: return dispatch_indextype(indextype, arg); case rocsparse_datatype_f32_c: return dispatch_indextype(indextype, arg); case rocsparse_datatype_f64_c: return dispatch_indextype(indextype, arg); case rocsparse_datatype_i8_r: case rocsparse_datatype_u8_r: case rocsparse_datatype_i32_r: case rocsparse_datatype_u32_r: return rocsparse_status_invalid_value; } return rocsparse_status_invalid_value; } // // // rocsparse_status rocsparse_routine::dispatch(const char precision, const char indextype, const Arguments& arg) const { switch(this->value) { #define ROCSPARSE_DO_ROUTINE(FNAME) \ case FNAME: \ return dispatch_precision(precision, indextype, arg); ROCSPARSE_FOREACH_ROUTINE; #undef ROCSPARSE_DO_ROUTINE } return rocsparse_status_invalid_value; } // // // constexpr const char* rocsparse_routine::to_string() const { // // switch for checking inconsistency. // switch(this->value) { #define ROCSPARSE_DO_ROUTINE(x_) \ case x_: \ { \ if(strcmp(#x_, s_routine_names[this->value])) \ return nullptr; \ break; \ } ROCSPARSE_FOREACH_ROUTINE; } #undef ROCSPARSE_DO_ROUTINE return s_routine_names[this->value]; } // Level1 #include "testing_axpyi.hpp" #include "testing_dotci.hpp" #include "testing_doti.hpp" #include "testing_gthr.hpp" #include "testing_gthrz.hpp" #include "testing_roti.hpp" #include "testing_sctr.hpp" // Level2 #include "testing_bsrmv.hpp" #include "testing_bsrsv.hpp" #include "testing_bsrxmv.hpp" #include "testing_csritsv.hpp" #include "testing_csrmv_managed.hpp" #include "testing_csrsv.hpp" #include "testing_gebsrmv.hpp" #include "testing_gemvi.hpp" #include "testing_hybmv.hpp" #include "testing_spitsv_csr.hpp" #include "testing_spmv_bsr.hpp" #include "testing_spmv_coo.hpp" #include "testing_spmv_coo_aos.hpp" #include "testing_spmv_csc.hpp" #include "testing_spmv_csr.hpp" #include "testing_spmv_ell.hpp" #include "testing_spsv_coo.hpp" #include "testing_spsv_csr.hpp" // Level3 #include "testing_bsrmm.hpp" #include "testing_bsrsm.hpp" #include "testing_csrmm.hpp" #include "testing_csrsm.hpp" #include "testing_gebsrmm.hpp" #include "testing_gemmi.hpp" #include "testing_sddmm.hpp" #include "testing_spmm_batched_bell.hpp" #include "testing_spmm_batched_coo.hpp" #include "testing_spmm_batched_csc.hpp" #include "testing_spmm_batched_csr.hpp" #include "testing_spmm_bell.hpp" #include "testing_spmm_coo.hpp" #include "testing_spmm_csc.hpp" #include "testing_spmm_csr.hpp" #include "testing_spsm_coo.hpp" #include "testing_spsm_csr.hpp" // Extra #include "testing_bsrgeam.hpp" #include "testing_bsrgemm.hpp" #include "testing_csrgeam.hpp" #include "testing_csrgemm.hpp" #include "testing_csrgemm_reuse.hpp" #include "testing_spgemm_bsr.hpp" #include "testing_spgemm_csr.hpp" // Preconditioner #include "testing_bsric0.hpp" #include "testing_bsrilu0.hpp" #include "testing_csric0.hpp" #include "testing_csrilu0.hpp" #include "testing_csritilu0.hpp" #include "testing_gpsv_interleaved_batch.hpp" #include "testing_gtsv.hpp" #include "testing_gtsv_interleaved_batch.hpp" #include "testing_gtsv_no_pivot.hpp" #include "testing_gtsv_no_pivot_strided_batch.hpp" // Conversion #include "testing_bsr2csr.hpp" #include "testing_coo2csr.hpp" #include "testing_coo2dense.hpp" #include "testing_coosort.hpp" #include "testing_csc2dense.hpp" #include "testing_cscsort.hpp" #include "testing_csr2bsr.hpp" #include "testing_csr2coo.hpp" #include "testing_csr2csc.hpp" #include "testing_csr2csr_compress.hpp" #include "testing_csr2dense.hpp" #include "testing_csr2ell.hpp" #include "testing_csr2gebsr.hpp" #include "testing_csr2hyb.hpp" #include "testing_csrsort.hpp" #include "testing_dense2coo.hpp" #include "testing_dense2csc.hpp" #include "testing_dense2csr.hpp" #include "testing_dense_to_sparse_coo.hpp" #include "testing_dense_to_sparse_csc.hpp" #include "testing_dense_to_sparse_csr.hpp" #include "testing_ell2csr.hpp" #include "testing_gebsr2csr.hpp" #include "testing_gebsr2gebsc.hpp" #include "testing_gebsr2gebsr.hpp" #include "testing_hyb2csr.hpp" #include "testing_identity.hpp" #include "testing_nnz.hpp" #include "testing_prune_csr2csr.hpp" #include "testing_prune_csr2csr_by_percentage.hpp" #include "testing_prune_dense2csr.hpp" #include "testing_prune_dense2csr_by_percentage.hpp" #include "testing_sparse_to_dense_coo.hpp" #include "testing_sparse_to_dense_csc.hpp" #include "testing_sparse_to_dense_csr.hpp" // Reordering #include "testing_csrcolor.hpp" // Util #include "testing_check_matrix_coo.hpp" #include "testing_check_matrix_csc.hpp" #include "testing_check_matrix_csr.hpp" #include "testing_check_matrix_ell.hpp" #include "testing_check_matrix_gebsc.hpp" #include "testing_check_matrix_gebsr.hpp" #include "testing_check_matrix_hyb.hpp" template rocsparse_status rocsparse_routine::dispatch_call(const Arguments& arg) { #define DEFINE_CASE_IT_X(value, testingf) \ case value: \ { \ try \ { \ testingf(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } #define DEFINE_CASE_IJT_X(value, testingf) \ case value: \ { \ try \ { \ testingf(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } #define DEFINE_CASE_IAXYT_X(value, testingf) \ case value: \ { \ try \ { \ testingf(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } #define DEFINE_CASE_IJAXYT_X(value, testingf) \ case value: \ { \ try \ { \ testingf(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } #define DEFINE_CASE_IT(value) DEFINE_CASE_IT_X(value, testing_##value) #define DEFINE_CASE_IJT(value) DEFINE_CASE_IJT_X(value, testing_##value) #define DEFINE_CASE_IAXYT(value) DEFINE_CASE_IAXYT_X(value, testing_##value) #define DEFINE_CASE_IJAXYT(value) DEFINE_CASE_IJAXYT_X(value, testing_##value) #define IS_T_REAL (std::is_same() || std::is_same()) #define IS_T_COMPLEX \ (std::is_same() || std::is_same()) #define DEFINE_CASE_T_REAL_ONLY(value) \ case value: \ { \ if(IS_T_REAL) \ { \ try \ { \ testing_##value(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } \ else \ { \ return rocsparse_status_not_implemented; \ } \ } #define DEFINE_CASE_T_FLOAT_ONLY(value) \ case value: \ { \ if(std::is_same()) \ { \ try \ { \ testing_##value(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } \ else \ { \ return rocsparse_status_not_implemented; \ } \ } #define DEFINE_CASE_T_X(value, testingf) \ case value: \ { \ try \ { \ testingf(arg); \ return rocsparse_status_success; \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } #define DEFINE_CASE_T(value) DEFINE_CASE_T_X(value, testing_##value) #define DEFINE_CASE_T_REAL_VS_COMPLEX(value, rtestingf, ctestingf) \ case value: \ { \ try \ { \ if(IS_T_REAL) \ { \ rtestingf(arg); \ } \ else if(IS_T_COMPLEX) \ { \ ctestingf(arg); \ } \ else \ { \ return rocsparse_status_internal_error; \ } \ } \ catch(const rocsparse_status& status) \ { \ return status; \ } \ } switch(FNAME) { DEFINE_CASE_T(axpyi); DEFINE_CASE_IT_X(bellmm, testing_spmm_bell); DEFINE_CASE_IT_X(bellmm_batched, testing_spmm_batched_bell); DEFINE_CASE_T(bsrgeam); DEFINE_CASE_T(bsric0); DEFINE_CASE_T(bsrilu0); DEFINE_CASE_T(bsrmm); DEFINE_CASE_T(bsrsm); DEFINE_CASE_T(bsrsv); DEFINE_CASE_T(bsrxmv); DEFINE_CASE_T(bsr2csr); DEFINE_CASE_T(check_matrix_csr); DEFINE_CASE_T(check_matrix_csc); DEFINE_CASE_T(check_matrix_coo); DEFINE_CASE_T(check_matrix_gebsr); DEFINE_CASE_T(check_matrix_gebsc); DEFINE_CASE_T(check_matrix_ell); DEFINE_CASE_T(check_matrix_hyb); DEFINE_CASE_IT_X(coomm, testing_spmm_coo); DEFINE_CASE_IT_X(coomm_batched, testing_spmm_batched_coo); DEFINE_CASE_IAXYT_X(coomv, testing_spmv_coo); DEFINE_CASE_T_FLOAT_ONLY(coosort); DEFINE_CASE_IT_X(coosv, testing_spsv_coo); DEFINE_CASE_IAXYT_X(coomv_aos, testing_spmv_coo_aos); DEFINE_CASE_IT_X(coosm, testing_spsm_coo); DEFINE_CASE_T_FLOAT_ONLY(coo2csr); DEFINE_CASE_T(coo2dense); DEFINE_CASE_T_FLOAT_ONLY(cscsort); DEFINE_CASE_T(csc2dense); DEFINE_CASE_T(csrcolor); DEFINE_CASE_T(csric0); DEFINE_CASE_T(csrilu0); DEFINE_CASE_T(csritilu0); DEFINE_CASE_T(csrgeam); DEFINE_CASE_IJT_X(bsrgemm, testing_spgemm_bsr); DEFINE_CASE_IJT_X(csrgemm, testing_spgemm_csr); DEFINE_CASE_T(csrgemm_reuse); DEFINE_CASE_IJAXYT_X(bsrmv, testing_spmv_bsr); DEFINE_CASE_IJAXYT_X(csrmv, testing_spmv_csr); DEFINE_CASE_T(csrmv_managed); DEFINE_CASE_IJAXYT_X(cscmv, testing_spmv_csc); DEFINE_CASE_IJT_X(csrmm, testing_spmm_csr); DEFINE_CASE_IJT_X(csrmm_batched, testing_spmm_batched_csr); DEFINE_CASE_IJT_X(cscmm, testing_spmm_csc); DEFINE_CASE_IJT_X(cscmm_batched, testing_spmm_batched_csc); DEFINE_CASE_IJT_X(csrsm, testing_spsm_csr); DEFINE_CASE_T_FLOAT_ONLY(csrsort); DEFINE_CASE_IJT_X(csrsv, testing_spsv_csr); DEFINE_CASE_IJT_X(spitsv_csr, testing_spitsv_csr); DEFINE_CASE_T(csritsv); DEFINE_CASE_T(csr2dense); DEFINE_CASE_T(csr2bsr); DEFINE_CASE_T_FLOAT_ONLY(csr2coo); DEFINE_CASE_T(csr2csc); DEFINE_CASE_T(csr2csr_compress); DEFINE_CASE_T(csr2ell); DEFINE_CASE_T(csr2gebsr); DEFINE_CASE_T(csr2hyb); DEFINE_CASE_T(dense2coo); DEFINE_CASE_T(dense2csc); DEFINE_CASE_T(dense2csr); DEFINE_CASE_IT(dense_to_sparse_coo); DEFINE_CASE_IJT(dense_to_sparse_csc); DEFINE_CASE_IJT(dense_to_sparse_csr); DEFINE_CASE_T(doti); DEFINE_CASE_T_REAL_VS_COMPLEX(dotci, testing_doti, testing_dotci); DEFINE_CASE_IAXYT_X(ellmv, testing_spmv_ell); DEFINE_CASE_T(ell2csr); DEFINE_CASE_T(gebsr2csr); DEFINE_CASE_T(gebsr2gebsr); DEFINE_CASE_T(gthr); DEFINE_CASE_T(gthrz); DEFINE_CASE_T(gebsr2gebsc); DEFINE_CASE_T(gebsrmv); DEFINE_CASE_T(gebsrmm); DEFINE_CASE_T(gemmi); DEFINE_CASE_T(gemvi); DEFINE_CASE_T(gtsv); DEFINE_CASE_T(gtsv_no_pivot); DEFINE_CASE_T(gtsv_no_pivot_strided_batch); DEFINE_CASE_T(gtsv_interleaved_batch); DEFINE_CASE_T(gpsv_interleaved_batch); DEFINE_CASE_T(hybmv); DEFINE_CASE_T(hyb2csr); DEFINE_CASE_T_FLOAT_ONLY(identity); DEFINE_CASE_T(nnz); DEFINE_CASE_T_REAL_ONLY(prune_csr2csr); DEFINE_CASE_T_REAL_ONLY(prune_csr2csr_by_percentage); DEFINE_CASE_T_REAL_ONLY(prune_dense2csr); DEFINE_CASE_T_REAL_ONLY(prune_dense2csr_by_percentage); DEFINE_CASE_T_REAL_ONLY(roti); DEFINE_CASE_T(sctr); DEFINE_CASE_IJT(sddmm); DEFINE_CASE_IT(sparse_to_dense_coo); DEFINE_CASE_IJT(sparse_to_dense_csc); DEFINE_CASE_IJT(sparse_to_dense_csr); } #undef DEFINE_CASE_IT_X #undef DEFINE_CASE_IJT_X #undef DEFINE_CASE_T_REAL_ONLY #undef DEFINE_CASE_T_FLOAT_ONLY #undef DEFINE_CASE_T_X #undef DEFINE_CASE_T #undef DEFINE_CASE_T_REAL_VS_COMPLEX #undef IS_T_REAL #undef IS_T_COMPLEX return rocsparse_status_invalid_value; } rocSPARSE-rocm-5.7.1/clients/benchmarks/rocsparse_routine.hpp000066400000000000000000000146551447342677400242550ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #include "rocsparse_arguments.hpp" // clang-format off #define ROCSPARSE_FOREACH_ROUTINE \ ROCSPARSE_DO_ROUTINE(axpyi) \ ROCSPARSE_DO_ROUTINE(bellmm) \ ROCSPARSE_DO_ROUTINE(bellmm_batched) \ ROCSPARSE_DO_ROUTINE(bsrgeam) \ ROCSPARSE_DO_ROUTINE(bsric0) \ ROCSPARSE_DO_ROUTINE(bsrilu0) \ ROCSPARSE_DO_ROUTINE(bsrgemm) \ ROCSPARSE_DO_ROUTINE(bsrmm) \ ROCSPARSE_DO_ROUTINE(bsrmv) \ ROCSPARSE_DO_ROUTINE(bsrsm) \ ROCSPARSE_DO_ROUTINE(bsrsv) \ ROCSPARSE_DO_ROUTINE(bsrxmv) \ ROCSPARSE_DO_ROUTINE(bsr2csr) \ ROCSPARSE_DO_ROUTINE(check_matrix_csr) \ ROCSPARSE_DO_ROUTINE(check_matrix_csc) \ ROCSPARSE_DO_ROUTINE(check_matrix_coo) \ ROCSPARSE_DO_ROUTINE(check_matrix_gebsr) \ ROCSPARSE_DO_ROUTINE(check_matrix_gebsc) \ ROCSPARSE_DO_ROUTINE(check_matrix_ell) \ ROCSPARSE_DO_ROUTINE(check_matrix_hyb) \ ROCSPARSE_DO_ROUTINE(coomm) \ ROCSPARSE_DO_ROUTINE(coomm_batched) \ ROCSPARSE_DO_ROUTINE(coomv) \ ROCSPARSE_DO_ROUTINE(coosort) \ ROCSPARSE_DO_ROUTINE(coosv) \ ROCSPARSE_DO_ROUTINE(coomv_aos) \ ROCSPARSE_DO_ROUTINE(coosm) \ ROCSPARSE_DO_ROUTINE(coo2csr) \ ROCSPARSE_DO_ROUTINE(coo2dense) \ ROCSPARSE_DO_ROUTINE(cscsort) \ ROCSPARSE_DO_ROUTINE(csc2dense) \ ROCSPARSE_DO_ROUTINE(csrcolor) \ ROCSPARSE_DO_ROUTINE(csric0) \ ROCSPARSE_DO_ROUTINE(csrilu0) \ ROCSPARSE_DO_ROUTINE(csritilu0) \ ROCSPARSE_DO_ROUTINE(csrgeam) \ ROCSPARSE_DO_ROUTINE(csrgemm) \ ROCSPARSE_DO_ROUTINE(csrgemm_reuse) \ ROCSPARSE_DO_ROUTINE(csrmv) \ ROCSPARSE_DO_ROUTINE(csrmv_managed) \ ROCSPARSE_DO_ROUTINE(cscmv) \ ROCSPARSE_DO_ROUTINE(csrmm) \ ROCSPARSE_DO_ROUTINE(csrmm_batched) \ ROCSPARSE_DO_ROUTINE(cscmm) \ ROCSPARSE_DO_ROUTINE(cscmm_batched) \ ROCSPARSE_DO_ROUTINE(csrsm) \ ROCSPARSE_DO_ROUTINE(csrsort) \ ROCSPARSE_DO_ROUTINE(csrsv) \ ROCSPARSE_DO_ROUTINE(csritsv) \ ROCSPARSE_DO_ROUTINE(spitsv_csr) \ ROCSPARSE_DO_ROUTINE(csr2dense) \ ROCSPARSE_DO_ROUTINE(csr2bsr) \ ROCSPARSE_DO_ROUTINE(csr2coo) \ ROCSPARSE_DO_ROUTINE(csr2csc) \ ROCSPARSE_DO_ROUTINE(csr2csr_compress) \ ROCSPARSE_DO_ROUTINE(csr2ell) \ ROCSPARSE_DO_ROUTINE(csr2gebsr) \ ROCSPARSE_DO_ROUTINE(csr2hyb) \ ROCSPARSE_DO_ROUTINE(dense2coo) \ ROCSPARSE_DO_ROUTINE(dense2csc) \ ROCSPARSE_DO_ROUTINE(dense2csr) \ ROCSPARSE_DO_ROUTINE(dense_to_sparse_coo) \ ROCSPARSE_DO_ROUTINE(dense_to_sparse_csc) \ ROCSPARSE_DO_ROUTINE(dense_to_sparse_csr) \ ROCSPARSE_DO_ROUTINE(doti) \ ROCSPARSE_DO_ROUTINE(dotci) \ ROCSPARSE_DO_ROUTINE(ellmv) \ ROCSPARSE_DO_ROUTINE(ell2csr) \ ROCSPARSE_DO_ROUTINE(gebsr2csr) \ ROCSPARSE_DO_ROUTINE(gebsr2gebsr) \ ROCSPARSE_DO_ROUTINE(gthr) \ ROCSPARSE_DO_ROUTINE(gthrz) \ ROCSPARSE_DO_ROUTINE(gebsr2gebsc) \ ROCSPARSE_DO_ROUTINE(gebsrmv) \ ROCSPARSE_DO_ROUTINE(gebsrmm) \ ROCSPARSE_DO_ROUTINE(gemmi) \ ROCSPARSE_DO_ROUTINE(gemvi) \ ROCSPARSE_DO_ROUTINE(gpsv_interleaved_batch) \ ROCSPARSE_DO_ROUTINE(gtsv) \ ROCSPARSE_DO_ROUTINE(gtsv_no_pivot) \ ROCSPARSE_DO_ROUTINE(gtsv_no_pivot_strided_batch) \ ROCSPARSE_DO_ROUTINE(gtsv_interleaved_batch) \ ROCSPARSE_DO_ROUTINE(hybmv) \ ROCSPARSE_DO_ROUTINE(hyb2csr) \ ROCSPARSE_DO_ROUTINE(identity) \ ROCSPARSE_DO_ROUTINE(nnz) \ ROCSPARSE_DO_ROUTINE(prune_csr2csr) \ ROCSPARSE_DO_ROUTINE(prune_csr2csr_by_percentage) \ ROCSPARSE_DO_ROUTINE(prune_dense2csr) \ ROCSPARSE_DO_ROUTINE(prune_dense2csr_by_percentage) \ ROCSPARSE_DO_ROUTINE(roti) \ ROCSPARSE_DO_ROUTINE(sctr) \ ROCSPARSE_DO_ROUTINE(sddmm) \ ROCSPARSE_DO_ROUTINE(sparse_to_dense_coo) \ ROCSPARSE_DO_ROUTINE(sparse_to_dense_csc) \ ROCSPARSE_DO_ROUTINE(sparse_to_dense_csr) // clang-format on template static constexpr std::size_t countof(T (&)[N]) { return N; } struct rocsparse_routine { private: public: #define ROCSPARSE_DO_ROUTINE(x_) x_, typedef enum _ : rocsparse_int { ROCSPARSE_FOREACH_ROUTINE } value_type; value_type value{}; static constexpr value_type all_routines[] = {ROCSPARSE_FOREACH_ROUTINE}; #undef ROCSPARSE_DO_ROUTINE static constexpr std::size_t num_routines = countof(all_routines); private: #define ROCSPARSE_DO_ROUTINE(x_) #x_, static constexpr const char* s_routine_names[num_routines]{ROCSPARSE_FOREACH_ROUTINE}; #undef ROCSPARSE_DO_ROUTINE public: rocsparse_routine(); rocsparse_routine& operator()(const char* function); explicit rocsparse_routine(const char* function); rocsparse_status dispatch(const char precision, const char indextype, const Arguments& arg) const; constexpr const char* to_string() const; private: template static rocsparse_status dispatch_call(const Arguments& arg); template static rocsparse_status dispatch_indextype(const char cindextype, const Arguments& arg); template static rocsparse_status dispatch_precision(const char precision, const char indextype, const Arguments& arg); }; rocSPARSE-rocm-5.7.1/clients/common/000077500000000000000000000000001447342677400171365ustar00rootroot00000000000000rocSPARSE-rocm-5.7.1/clients/common/rocsparse_check.cpp000066400000000000000000000412541447342677400230060ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_check.hpp" #ifdef GOOGLE_TEST #include #endif #ifndef GOOGLE_TEST #include #define ASSERT_TRUE(cond) \ do \ { \ if(!(cond)) \ { \ std::cerr << "ASSERT_TRUE() failed." << std::endl; \ exit(EXIT_FAILURE); \ } \ } while(0) #define ASSERT_EQ(state1, state2) \ do \ { \ if(state1 != state2) \ { \ std::cerr.precision(16); \ std::cerr << "ASSERT_EQ(" << state1 << ", " << state2 << ") failed." << std::endl; \ exit(EXIT_FAILURE); \ } \ } while(0) #define ASSERT_FLOAT_EQ ASSERT_EQ #define ASSERT_DOUBLE_EQ ASSERT_EQ #endif #define ASSERT_FLOAT_COMPLEX_EQ(a, b) \ do \ { \ ASSERT_FLOAT_EQ(std::real(a), std::real(b)); \ ASSERT_FLOAT_EQ(std::imag(a), std::imag(b)); \ } while(0) #define ASSERT_DOUBLE_COMPLEX_EQ(a, b) \ do \ { \ ASSERT_DOUBLE_EQ(std::real(a), std::real(b)); \ ASSERT_DOUBLE_EQ(std::imag(a), std::imag(b)); \ } while(0) #define ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, UNIT_ASSERT_EQ) \ do \ { \ for(int64_t j = 0; j < N; ++j) \ for(int64_t i = 0; i < M; ++i) \ if(rocsparse_isnan(A[i + j * LDA])) \ { \ ASSERT_TRUE(rocsparse_isnan(B[i + j * LDB])); \ } \ else \ { \ UNIT_ASSERT_EQ(A[i + j * LDA], B[i + j * LDB]); \ } \ } while(0) template <> void unit_check_general( int64_t M, int64_t N, const float* A, int64_t LDA, const float* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_FLOAT_EQ); } template <> void unit_check_general( int64_t M, int64_t N, const double* A, int64_t LDA, const double* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int64_t M, int64_t N, const rocsparse_float_complex* A, int64_t LDA, const rocsparse_float_complex* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int64_t M, int64_t N, const rocsparse_double_complex* A, int64_t LDA, const rocsparse_double_complex* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_DOUBLE_COMPLEX_EQ); } template <> void unit_check_general( int64_t M, int64_t N, const int32_t* A, int64_t LDA, const int32_t* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_EQ); } template <> void unit_check_general( int64_t M, int64_t N, const int64_t* A, int64_t LDA, const int64_t* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_EQ); } template <> void unit_check_general( int64_t M, int64_t N, const size_t* A, int64_t LDA, const size_t* B, int64_t LDB) { ROCSPARSE_UNIT_CHECK(M, N, A, LDA, B, LDB, ASSERT_EQ); } template <> void unit_check_enum(const rocsparse_index_base a, const rocsparse_index_base b) { ASSERT_TRUE(a == b); } template <> void unit_check_enum(const rocsparse_order a, const rocsparse_order b) { ASSERT_TRUE(a == b); } template <> void unit_check_enum(const rocsparse_direction a, const rocsparse_direction b) { ASSERT_TRUE(a == b); } #define MAX_TOL_MULTIPLIER 4 template void near_check_general_template(int64_t M, int64_t N, const T* A, int64_t LDA, const T* B, int64_t LDB, floating_data_t tol = default_tolerance::value) { int tolm = 1; for(int64_t j = 0; j < N; ++j) { for(int64_t i = 0; i < M; ++i) { T compare_val = std::max(std::abs(A[i + j * LDA] * tol), 10 * std::numeric_limits::epsilon()); #ifdef GOOGLE_TEST if(rocsparse_isnan(A[i + j * LDA])) { ASSERT_TRUE(rocsparse_isnan(B[i + j * LDB])); } else if(rocsparse_isinf(A[i + j * LDA])) { ASSERT_TRUE(rocsparse_isinf(B[i + j * LDB])); } else { int k; for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k) { if(std::abs(A[i + j * LDA] - B[i + j * LDB]) <= compare_val * k) { break; } } if(k > MAX_TOL_MULTIPLIER) { ASSERT_NEAR(A[i + j * LDA], B[i + j * LDB], compare_val); } tolm = std::max(tolm, k); } #else int k; for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k) { if(std::abs(A[i + j * LDA] - B[i + j * LDB]) <= compare_val * k) { break; } } if(k > MAX_TOL_MULTIPLIER) { std::cerr.precision(12); std::cerr << "ASSERT_NEAR(" << A[i + j * LDA] << ", " << B[i + j * LDB] << ") failed: " << std::abs(A[i + j * LDA] - B[i + j * LDB]) << " exceeds permissive range [" << compare_val << "," << compare_val * MAX_TOL_MULTIPLIER << " ]" << std::endl; exit(EXIT_FAILURE); } tolm = std::max(tolm, k); #endif } } if(tolm > 1) { std::cerr << "WARNING near_check has been permissive with a tolerance multiplier equal to " << tolm << std::endl; } } template <> void near_check_general_template(int64_t M, int64_t N, const rocsparse_float_complex* A, int64_t LDA, const rocsparse_float_complex* B, int64_t LDB, float tol) { int tolm = 1; for(int64_t j = 0; j < N; ++j) { for(int64_t i = 0; i < M; ++i) { rocsparse_float_complex compare_val = rocsparse_float_complex(std::max(std::abs(std::real(A[i + j * LDA]) * tol), 10 * std::numeric_limits::epsilon()), std::max(std::abs(std::imag(A[i + j * LDA]) * tol), 10 * std::numeric_limits::epsilon())); #ifdef GOOGLE_TEST if(rocsparse_isnan(A[i + j * LDA])) { ASSERT_TRUE(rocsparse_isnan(B[i + j * LDB])); } else if(rocsparse_isinf(A[i + j * LDA])) { ASSERT_TRUE(rocsparse_isinf(B[i + j * LDB])); } else { int k; for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k) { if(std::abs(std::real(A[i + j * LDA]) - std::real(B[i + j * LDB])) <= std::real(compare_val) * k && std::abs(std::imag(A[i + j * LDA]) - std::imag(B[i + j * LDB])) <= std::imag(compare_val) * k) { break; } } if(k > MAX_TOL_MULTIPLIER) { ASSERT_NEAR(std::real(A[i + j * LDA]), std::real(B[i + j * LDB]), std::real(compare_val)); ASSERT_NEAR(std::imag(A[i + j * LDA]), std::imag(B[i + j * LDB]), std::imag(compare_val)); } tolm = std::max(tolm, k); } #else int k; for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k) { if(std::abs(std::real(A[i + j * LDA]) - std::real(B[i + j * LDB])) <= std::real(compare_val) * k && std::abs(std::imag(A[i + j * LDA]) - std::imag(B[i + j * LDB])) <= std::imag(compare_val) * k) { break; } } if(k > MAX_TOL_MULTIPLIER) { std::cerr.precision(16); std::cerr << "ASSERT_NEAR(" << A[i + j * LDA] << ", " << B[i + j * LDB] << ") failed: " << std::abs(A[i + j * LDA] - B[i + j * LDB]) << " exceeds permissive range [" << compare_val << "," << compare_val * MAX_TOL_MULTIPLIER << " ]" << std::endl; exit(EXIT_FAILURE); } tolm = std::max(tolm, k); #endif } } if(tolm > 1) { std::cerr << "WARNING near_check has been permissive with a tolerance multiplier equal to " << tolm << std::endl; } } template <> void near_check_general_template(int64_t M, int64_t N, const rocsparse_double_complex* A, int64_t LDA, const rocsparse_double_complex* B, int64_t LDB, double tol) { int tolm = 1; for(int64_t j = 0; j < N; ++j) { for(int64_t i = 0; i < M; ++i) { rocsparse_double_complex compare_val = rocsparse_double_complex(std::max(std::abs(std::real(A[i + j * LDA]) * tol), 10 * std::numeric_limits::epsilon()), std::max(std::abs(std::imag(A[i + j * LDA]) * tol), 10 * std::numeric_limits::epsilon())); #ifdef GOOGLE_TEST if(rocsparse_isnan(A[i + j * LDA])) { ASSERT_TRUE(rocsparse_isnan(B[i + j * LDB])); } else if(rocsparse_isinf(A[i + j * LDA])) { ASSERT_TRUE(rocsparse_isinf(B[i + j * LDB])); } else { int k; for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k) { if(std::abs(std::real(A[i + j * LDA]) - std::real(B[i + j * LDB])) <= std::real(compare_val) * k && std::abs(std::imag(A[i + j * LDA]) - std::imag(B[i + j * LDB])) <= std::imag(compare_val) * k) { break; } } if(k > MAX_TOL_MULTIPLIER) { ASSERT_NEAR(std::real(A[i + j * LDA]), std::real(B[i + j * LDB]), std::real(compare_val)); ASSERT_NEAR(std::imag(A[i + j * LDA]), std::imag(B[i + j * LDB]), std::imag(compare_val)); } tolm = std::max(tolm, k); } #else int k; for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k) { if(std::abs(std::real(A[i + j * LDA]) - std::real(B[i + j * LDB])) <= std::real(compare_val) * k && std::abs(std::imag(A[i + j * LDA]) - std::imag(B[i + j * LDB])) <= std::imag(compare_val) * k) { break; } } if(k > MAX_TOL_MULTIPLIER) { std::cerr.precision(16); std::cerr << "ASSERT_NEAR(" << A[i + j * LDA] << ", " << B[i + j * LDB] << ") failed: " << std::abs(A[i + j * LDA] - B[i + j * LDB]) << " exceeds permissive range [" << compare_val << "," << compare_val * MAX_TOL_MULTIPLIER << " ]" << std::endl; exit(EXIT_FAILURE); } tolm = std::max(tolm, k); #endif } } if(tolm > 1) { std::cerr << "WARNING near_check has been permissive with a tolerance multiplier equal to " << tolm << std::endl; } } template void near_check_general( int64_t M, int64_t N, const T* A, int64_t LDA, const T* B, int64_t LDB, floating_data_t tol) { near_check_general_template(M, N, A, LDA, B, LDB, tol); } #define INSTANTIATE(TYPE) \ template void near_check_general(int64_t M, \ int64_t N, \ const TYPE* A, \ int64_t LDA, \ const TYPE* B, \ int64_t LDB, \ floating_data_t tol) INSTANTIATE(int32_t); INSTANTIATE(float); INSTANTIATE(double); INSTANTIATE(rocsparse_float_complex); INSTANTIATE(rocsparse_double_complex); #undef INSTANTIATE rocSPARSE-rocm-5.7.1/clients/common/rocsparse_clients_envariables.cpp000066400000000000000000000222101447342677400257340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_clients_envariables.hpp" #include "rocsparse-types.h" #include constexpr rocsparse_clients_envariables::var_bool rocsparse_clients_envariables::s_var_bool_all[]; constexpr rocsparse_clients_envariables::var_string rocsparse_clients_envariables::s_var_string_all[]; template static inline constexpr std::size_t countof(T (&)[N]) { return N; } static constexpr size_t s_var_bool_size = countof(rocsparse_clients_envariables::s_var_bool_all); static constexpr size_t s_var_string_size = countof(rocsparse_clients_envariables::s_var_string_all); static constexpr const char* s_var_bool_names[s_var_bool_size] = {"ROCSPARSE_CLIENTS_VERBOSE"}; static constexpr const char* s_var_string_names[s_var_string_size] = {"ROCSPARSE_CLIENTS_MATRICES_DIR"}; static constexpr const char* s_var_bool_descriptions[s_var_bool_size] = {"0: disabled, 1: enabled"}; static constexpr const char* s_var_string_descriptions[s_var_string_size] = {"Full path of the matrices directory"}; /// /// @brief Grab an environment variable value. /// @return true if the operation is successful, false otherwise. /// template static bool rocsparse_getenv(const char* name, bool& defined, T& val); template <> bool rocsparse_getenv(const char* name, bool& defined, bool& val) { val = false; const char* getenv_str = getenv(name); defined = (getenv_str != nullptr); if(defined) { auto getenv_int = atoi(getenv_str); if((getenv_int != 0) && (getenv_int != 1)) { std::cerr << "rocsparse error, invalid environment variable " << name << " must be 0 or 1." << std::endl; val = false; return false; } else { val = (getenv_int == 1); return true; } } else { return true; } } template <> bool rocsparse_getenv(const char* name, bool& defined, std::string& val) { const char* getenv_str = getenv(name); defined = (getenv_str != nullptr); if(defined) { val = getenv_str; } return true; } struct rocsparse_clients_envariables_impl { public: // // \brief Return value of a Boolean variable. // inline bool get(rocsparse_clients_envariables::var_bool v) const { return this->m_var_bool[v]; }; // // \brief Return value of a string variable. // inline const char* get(rocsparse_clients_envariables::var_string v) const { return this->m_var_string[v].c_str(); }; // // \brief Is a Boolean variable defined ? // inline bool is_defined(rocsparse_clients_envariables::var_bool v) const { return this->m_var_bool_defined[v]; }; // // \brief Is a string variable defined ? // inline bool is_defined(rocsparse_clients_envariables::var_string v) const { return this->m_var_string_defined[v]; }; // // Return the unique instance. // static rocsparse_clients_envariables_impl& Instance(); private: ~rocsparse_clients_envariables_impl() = default; rocsparse_clients_envariables_impl(const rocsparse_clients_envariables_impl&) = delete; rocsparse_clients_envariables_impl& operator=(const rocsparse_clients_envariables_impl&) = delete; bool m_var_bool[s_var_bool_size]{}; bool m_var_bool_defined[s_var_bool_size]{}; std::string m_var_string[s_var_string_size]{}; bool m_var_string_defined[s_var_string_size]{}; rocsparse_clients_envariables_impl() { for(auto tag : rocsparse_clients_envariables::s_var_bool_all) { switch(tag) { case rocsparse_clients_envariables::VERBOSE: { const bool success = rocsparse_getenv( s_var_bool_names[tag], this->m_var_bool_defined[tag], this->m_var_bool[tag]); if(!success) { std::cerr << "rocsparse_getenv failed on fetching " << s_var_bool_names[tag] << std::endl; throw(rocsparse_status_invalid_value); } break; } } } for(auto tag : rocsparse_clients_envariables::s_var_string_all) { switch(tag) { case rocsparse_clients_envariables::MATRICES_DIR: { const bool success = rocsparse_getenv(s_var_string_names[tag], this->m_var_string_defined[tag], this->m_var_string[tag]); if(!success) { std::cerr << "rocsparse_getenv failed on fetching " << s_var_string_names[tag] << std::endl; throw(rocsparse_status_invalid_value); } break; } } } if(this->m_var_bool[rocsparse_clients_envariables::VERBOSE]) { for(auto tag : rocsparse_clients_envariables::s_var_bool_all) { switch(tag) { case rocsparse_clients_envariables::VERBOSE: { const bool v = this->m_var_bool[tag]; std::cout << "" << "env variable " << s_var_bool_names[tag] << " : " << ((this->m_var_bool_defined[tag]) ? ((v) ? "enabled" : "disabled") : "") << std::endl; break; } } } for(auto tag : rocsparse_clients_envariables::s_var_string_all) { switch(tag) { case rocsparse_clients_envariables::MATRICES_DIR: { const std::string v = this->m_var_string[tag]; std::cout << "" << "env variable " << s_var_string_names[tag] << " : " << ((this->m_var_string_defined[tag]) ? this->m_var_string[tag] : "") << std::endl; break; } } } } } }; rocsparse_clients_envariables_impl& rocsparse_clients_envariables_impl::Instance() { static rocsparse_clients_envariables_impl instance; return instance; } bool rocsparse_clients_envariables::is_defined(rocsparse_clients_envariables::var_string v) { return rocsparse_clients_envariables_impl::Instance().is_defined(v); } const char* rocsparse_clients_envariables::get(rocsparse_clients_envariables::var_string v) { return rocsparse_clients_envariables_impl::Instance().get(v); } const char* rocsparse_clients_envariables::get_name(rocsparse_clients_envariables::var_string v) { return s_var_string_names[v]; } const char* rocsparse_clients_envariables::get_description(rocsparse_clients_envariables::var_string v) { return s_var_string_descriptions[v]; } bool rocsparse_clients_envariables::is_defined(rocsparse_clients_envariables::var_bool v) { return rocsparse_clients_envariables_impl::Instance().is_defined(v); } bool rocsparse_clients_envariables::get(rocsparse_clients_envariables::var_bool v) { return rocsparse_clients_envariables_impl::Instance().get(v); } const char* rocsparse_clients_envariables::get_name(rocsparse_clients_envariables::var_bool v) { return s_var_bool_names[v]; } const char* rocsparse_clients_envariables::get_description(rocsparse_clients_envariables::var_bool v) { return s_var_bool_descriptions[v]; } rocSPARSE-rocm-5.7.1/clients/common/rocsparse_enum.cpp000066400000000000000000000037141447342677400226740ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse.hpp" #include "rocsparse_enum.hpp" constexpr rocsparse_matrix_type_t::value_t rocsparse_matrix_type_t::values[rocsparse_matrix_type_t::nvalues]; constexpr rocsparse_operation_t::value_t rocsparse_operation_t::values[rocsparse_operation_t::nvalues]; constexpr rocsparse_storage_mode_t::value_t rocsparse_storage_mode_t::values[rocsparse_storage_mode_t::nvalues]; std::ostream& operator<<(std::ostream& out, const rocsparse_operation& v) { out << rocsparse_operation2string(v); return out; } std::ostream& operator<<(std::ostream& out, const rocsparse_direction& v) { out << rocsparse_direction2string(v); return out; } rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_ascii.cpp000066400000000000000000000302121447342677400247410ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_exporter_ascii.hpp" template rocsparse_status rocsparse_type_conversion(const X& x, Y& y); rocsparse_exporter_ascii::~rocsparse_exporter_ascii() { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Export done." << std::endl; } } rocsparse_exporter_ascii::rocsparse_exporter_ascii(const std::string& filename_) : m_filename(filename_) { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Opening file '" << this->m_filename << "' ... " << std::endl; } } template void convert_array(int nnz, const void* data, void* mem); template rocsparse_status rocsparse_exporter_ascii::write_sparse_csx(rocsparse_direction dir_, J m_, J n_, I nnz_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { std::ofstream out(this->m_filename); if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } if(dir_ == rocsparse_direction_row) { out << "matrix: sparse_csr" << std::endl; } else { out << "matrix: sparse_csc" << std::endl; } out << "dir: " << dir_ << std::endl; out << "m: " << m_ << std::endl; out << "n: " << n_ << std::endl; out << "nnz: " << nnz_ << std::endl; out << "base: " << base_ << std::endl; const char* dir = (dir_ == rocsparse_direction_row) ? "row" : "col"; const char* odir = (dir_ == rocsparse_direction_row) ? "col" : "row"; J L = (dir_ == rocsparse_direction_row) ? m_ : n_; for(J i = 0; i < L; ++i) { out << dir << ": " << i << std::endl; for(int k = ptr_[i]; k < ptr_[i + 1]; ++k) { out << " " << odir << " = " << (ind_[k - base_] - base_) << ", val = " << val_[k] << std::endl; } } out.close(); return rocsparse_status_success; } template rocsparse_status rocsparse_exporter_ascii::write_sparse_gebsx(rocsparse_direction dir_, rocsparse_direction dirb_, J mb_, J nb_, I nnzb_, J bm_, J bn_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { std::ofstream out(this->m_filename); if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } if(dir_ == rocsparse_direction_row) { out << "matrix: sparse_gebsr" << std::endl; } else { out << "matrix: sparse_gebsc" << std::endl; } out << "dir: " << dir_ << std::endl; out << "dirb: " << dirb_ << std::endl; out << "mb: " << mb_ << std::endl; out << "nb: " << nb_ << std::endl; out << "nnzb: " << nnzb_ << std::endl; out << "bm: " << bm_ << std::endl; out << "bn: " << bn_ << std::endl; out << "base: " << base_ << std::endl; out.close(); return rocsparse_status_success; } template rocsparse_status rocsparse_exporter_ascii::write_dense_vector(I nmemb_, const T* __restrict__ x_, I incx_) { std::ofstream out(this->m_filename); if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "matrix: dense_vector" << std::endl; out << "m: " << nmemb_ << std::endl; out << "data: " << std::endl; for(I i = 0; i < nmemb_; ++i) { out << x_[incx_ * i] << std::endl; } out.close(); return rocsparse_status_success; } template rocsparse_status rocsparse_exporter_ascii::write_dense_matrix( rocsparse_order order_, I m_, I n_, const T* __restrict__ x_, I ld_) { std::ofstream out(this->m_filename); if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "matrix: dense_matrix" << std::endl; out << "order: " << order_ << std::endl; out << "m: " << m_ << std::endl; out << "n: " << n_ << std::endl; out << "data: " << std::endl; for(I i = 0; i < m_; ++i) { for(I j = 0; j < n_; ++j) { if(order_ == rocsparse_order_row) { out << " " << x_[ld_ * j + i]; } else { out << " " << x_[ld_ * i + j]; } } out << std::endl; } out.close(); return rocsparse_status_success; } template rocsparse_status rocsparse_exporter_ascii::write_sparse_coo(I m_, I n_, I nnz_, const I* __restrict__ row_ind_, const I* __restrict__ col_ind_, const T* __restrict__ val_, rocsparse_index_base base_) { return rocsparse_status_not_implemented; } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_exporter_ascii::write_sparse_csx(rocsparse_direction, \ J, \ J, \ I, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base); \ template rocsparse_status rocsparse_exporter_ascii::write_sparse_gebsx(rocsparse_direction, \ rocsparse_direction, \ J, \ J, \ I, \ J, \ J, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_exporter_ascii::write_dense_vector( \ I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_ascii::write_dense_matrix( \ rocsparse_order, I, I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_ascii::write_sparse_coo(I, \ I, \ I, \ const I* __restrict__, \ const I* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_ascii.hpp000066400000000000000000000074431447342677400247600ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_EXPORTER_ASCII_HPP #define ROCSPARSE_EXPORTER_ASCII_HPP #include "rocsparse_exporter.hpp" class rocsparse_exporter_ascii : public rocsparse_exporter { protected: std::string m_filename{}; public: ~rocsparse_exporter_ascii(); rocsparse_exporter_ascii(const std::string& filename_); template rocsparse_status write_sparse_csx(rocsparse_direction dir, J m, J n, I nnz, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_gebsx(rocsparse_direction dir, rocsparse_direction dirb, J mb, J nb, I nnzb, J block_dim_row, J block_dim_column, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_coo(I m, I n, I nnz, const I* __restrict__ row_ind, const I* __restrict__ col_ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_dense_vector(I size, const T* __restrict__ x, I incx); template rocsparse_status write_dense_matrix(rocsparse_order order, I m, I n, const T* __restrict__ x, I ld); }; #endif // HEADER rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_matrixmarket.cpp000066400000000000000000000507221447342677400263710ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_exporter_matrixmarket.hpp" template rocsparse_status rocsparse_type_conversion(const X& x, Y& y); rocsparse_exporter_matrixmarket::~rocsparse_exporter_matrixmarket() { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Export done." << std::endl; } } rocsparse_exporter_matrixmarket::rocsparse_exporter_matrixmarket(const std::string& filename_) : m_filename(filename_) { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Opening file '" << this->m_filename << "' ... " << std::endl; } } template rocsparse_status rocsparse_exporter_matrixmarket::write_sparse_csx(rocsparse_direction dir_, J m_, J n_, I nnz_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { std::ofstream out(this->m_filename); if(!out.is_open()) { return rocsparse_status_internal_error; } if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "%%MatrixMarket matrix coordinate "; if(std::is_same() || std::is_same()) out << "complex"; else out << "real"; out << " general" << std::endl; out << m_ << " " << n_ << " " << nnz_ << std::endl; switch(dir_) { case rocsparse_direction_row: { for(J i = 0; i < m_; ++i) { for(I at = ptr_[i] - base_; at < ptr_[i + 1] - base_; ++at) { J j = ind_[at] - base_; T x = val_[at]; out << (i + 1) << " " << (j + 1); if(std::is_same() || std::is_same()) { out << " " << std::real(x) << " " << std::imag(x); } else { out << " " << x; } out << std::endl; } } out.close(); return rocsparse_status_success; } case rocsparse_direction_column: { for(J j = 0; j < n_; ++j) { for(I at = ptr_[j] - base_; at < ptr_[j + 1] - base_; ++at) { J i = ind_[at] - base_; T x = val_[at]; out << (i + 1) << " " << (j + 1); if(std::is_same() || std::is_same()) { out << " " << std::real(x) << " " << std::imag(x); } else { out << " " << x; } out << std::endl; } } out.close(); return rocsparse_status_success; } } return rocsparse_status_invalid_value; } template rocsparse_status rocsparse_exporter_matrixmarket::write_sparse_gebsx(rocsparse_direction dir_, rocsparse_direction dirb_, J mb_, J nb_, I nnzb_, J block_dim_row_, J block_dim_column_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { std::ofstream out(this->m_filename); if(!out.is_open()) { return rocsparse_status_internal_error; } if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "%%MatrixMarket matrix coordinate "; if(std::is_same() || std::is_same()) out << "complex"; else out << "real"; out << " general" << std::endl; out << mb_ * block_dim_row_ << " " << nb_ * block_dim_column_ << " " << nnzb_ * block_dim_row_ * block_dim_column_ << std::endl; switch(dir_) { case rocsparse_direction_row: { for(J ib = 0; ib < mb_; ++ib) { I i = ib * block_dim_row_; for(I at = ptr_[ib] - base_; at < ptr_[ib + 1] - base_; ++at) { J j = (ind_[at] - base_) * block_dim_column_; switch(dirb_) { case rocsparse_direction_row: { for(J k = 0; k < block_dim_row_; ++k) { for(J l = 0; l < block_dim_column_; ++l) { auto v = val_[at * block_dim_row_ * block_dim_column_ + block_dim_column_ * k + l]; out << (i + k) << " " << (j + l); if(std::is_same() || std::is_same()) { out << " " << std::real(v) << " " << std::imag(v); } else { out << " " << v; } out << std::endl; } } break; } case rocsparse_direction_column: { for(J k = 0; k < block_dim_row_; ++k) { for(J l = 0; l < block_dim_column_; ++l) { auto v = val_[at * block_dim_row_ * block_dim_column_ + block_dim_row_ * l + k]; out << (i + k) << " " << (j + l); if(std::is_same() || std::is_same()) { out << " " << std::real(v) << " " << std::imag(v); } else { out << " " << v; } out << std::endl; } } break; } } } } out.close(); return rocsparse_status_success; } case rocsparse_direction_column: { for(J jb = 0; jb < nb_; ++jb) { I j = jb * block_dim_column_; for(I at = ptr_[jb] - base_; at < ptr_[jb + 1] - base_; ++at) { J i = (ind_[at] - base_) * block_dim_row_; switch(dirb_) { case rocsparse_direction_row: { for(J k = 0; k < block_dim_row_; ++k) { for(J l = 0; l < block_dim_column_; ++l) { auto v = val_[at * block_dim_row_ * block_dim_column_ + block_dim_column_ * k + l]; out << (i + k) << " " << (j + l); if(std::is_same() || std::is_same()) { out << " " << std::real(v) << " " << std::imag(v); } else { out << " " << v; } out << std::endl; } } break; } case rocsparse_direction_column: { for(J k = 0; k < block_dim_row_; ++k) { for(J l = 0; l < block_dim_column_; ++l) { auto v = val_[at * block_dim_row_ * block_dim_column_ + block_dim_row_ * l + k]; out << (i + k) << " " << (j + l); if(std::is_same() || std::is_same()) { out << " " << std::real(v) << " " << std::imag(v); } else { out << " " << v; } out << std::endl; } } break; } } } } out.close(); return rocsparse_status_success; } } return rocsparse_status_invalid_value; std::cerr << "rocsparse_exporter_matrixmarket, gebsx not supported." << std::endl; return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_exporter_matrixmarket::write_dense_vector(I nmemb_, const T* __restrict__ x_, I incx_) { std::ofstream out(this->m_filename); if(!out.is_open()) { return rocsparse_status_internal_error; } if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "%%MatrixMarket matrix array "; if(std::is_same() || std::is_same()) out << "complex"; else out << "real"; out << " general" << std::endl; out << nmemb_ << " 1" << std::endl; for(I i = 0; i < nmemb_; ++i) { if(std::is_same() || std::is_same()) { out << std::real(x_[i * incx_]) << " " << std::imag(x_[i * incx_]) << std::endl; } else { out << x_[i * incx_] << std::endl; } } out.close(); return rocsparse_status_success; } template rocsparse_status rocsparse_exporter_matrixmarket::write_dense_matrix( rocsparse_order order_, I m_, I n_, const T* __restrict__ x_, I ld_) { std::ofstream out(this->m_filename); if(!out.is_open()) { return rocsparse_status_internal_error; } if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "%%MatrixMarket matrix array "; if(std::is_same() || std::is_same()) out << "complex"; else out << "real"; out << " general" << std::endl; out << m_ << " " << n_ << std::endl; switch(order_) { case rocsparse_order_row: { for(I i = 0; i < m_; ++i) { for(I j = 0; j < n_; ++j) { if(std::is_same() || std::is_same()) out << " " << std::real(x_[i * ld_ + j]) << " " << std::imag(x_[i * ld_ + j]); else out << " " << x_[i * ld_ + j]; } out << std::endl; } out.close(); return rocsparse_status_success; } case rocsparse_order_column: { for(I i = 0; i < m_; ++i) { for(I j = 0; j < n_; ++j) { if(std::is_same() || std::is_same()) out << " " << std::real(x_[j * ld_ + i]) << " " << std::imag(x_[j * ld_ + i]); else out << " " << x_[j * ld_ + i]; } out << std::endl; } out.close(); return rocsparse_status_success; } } return rocsparse_status_invalid_value; } template rocsparse_status rocsparse_exporter_matrixmarket::write_sparse_coo(I m_, I n_, I nnz_, const I* __restrict__ row_ind_, const I* __restrict__ col_ind_, const T* __restrict__ val_, rocsparse_index_base base_) { std::ofstream out(this->m_filename); if(!out.is_open()) { return rocsparse_status_internal_error; } if(std::is_same() || std::is_same()) { out.precision(15); out.setf(std::ios::scientific); } else { out.precision(7); out.setf(std::ios::scientific); } out << "%%MatrixMarket matrix coordinate "; if(std::is_same() || std::is_same()) out << "complex"; else out << "real"; out << " general" << std::endl; out << m_ << " " << n_ << " " << nnz_ << std::endl; for(I i = 0; i < nnz_; ++i) { out << ((row_ind_[i] - base_) + 1) << " " << ((col_ind_[i] - base_) + 1); if(std::is_same() || std::is_same()) { out << " " << std::real(val_[i]) << " " << std::imag(val_[i]) << std::endl; } else { out << " " << val_[i] << std::endl; } } out.close(); return rocsparse_status_success; } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_exporter_matrixmarket::write_sparse_csx( \ rocsparse_direction, \ J, \ J, \ I, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base); \ template rocsparse_status rocsparse_exporter_matrixmarket::write_sparse_gebsx( \ rocsparse_direction, \ rocsparse_direction, \ J, \ J, \ I, \ J, \ J, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_exporter_matrixmarket::write_dense_vector( \ I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_matrixmarket::write_dense_matrix( \ rocsparse_order, I, I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_matrixmarket::write_sparse_coo( \ I, \ I, \ I, \ const I* __restrict__, \ const I* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_matrixmarket.hpp000066400000000000000000000075151447342677400264000ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_EXPORTER_MATRIXMARKET_HPP #define ROCSPARSE_EXPORTER_MATRIXMARKET_HPP #include "rocsparse_exporter.hpp" class rocsparse_exporter_matrixmarket : public rocsparse_exporter { protected: std::string m_filename{}; public: ~rocsparse_exporter_matrixmarket(); rocsparse_exporter_matrixmarket(const std::string& filename_); template rocsparse_status write_sparse_csx(rocsparse_direction dir, J m, J n, I nnz, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_gebsx(rocsparse_direction dir, rocsparse_direction dirb, J mb, J nb, I nnzb, J block_dim_row, J block_dim_column, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_coo(I m, I n, I nnz, const I* __restrict__ row_ind, const I* __restrict__ col_ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_dense_vector(I size, const T* __restrict__ x, I incx); template rocsparse_status write_dense_matrix(rocsparse_order order, I m, I n, const T* __restrict__ x, I ld); }; #endif // HEADER rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_rocalution.cpp000066400000000000000000000331701447342677400260360ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_exporter_rocalution.hpp" template rocsparse_status rocsparse_type_conversion(const X& x, Y& y); rocsparse_exporter_rocalution::~rocsparse_exporter_rocalution() { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Export done." << std::endl; } } rocsparse_exporter_rocalution::rocsparse_exporter_rocalution(const std::string& filename_) : m_filename(filename_) { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Opening file '" << this->m_filename << "' ... " << std::endl; } } template rocsparse_status rocalution_write_sparse_csx( const char* filename, int m, int n, int nnz, const int* ptr, const int* col, const T* val) { std::ofstream out(filename, std::ios::out | std::ios::binary); if(!out.is_open()) { return rocsparse_status_internal_error; } // Header out << "#rocALUTION binary csr file" << std::endl; // rocALUTION version int version = 10602; out.write((char*)&version, sizeof(int)); // Data out.write((char*)&m, sizeof(int)); out.write((char*)&n, sizeof(int)); out.write((char*)&nnz, sizeof(int)); out.write((char*)ptr, (m + 1) * sizeof(int)); out.write((char*)col, nnz * sizeof(int)); out.write((char*)val, nnz * sizeof(T)); out.close(); return rocsparse_status_success; } template void convert_array(int nnz, const void* data, void* mem) { memcpy(mem, data, sizeof(T) * nnz); } template <> void convert_array(int nnz, const void* data, void* mem) { rocsparse_double_complex* pmem = (rocsparse_double_complex*)mem; const rocsparse_float_complex* pdata = (const rocsparse_float_complex*)data; for(int i = 0; i < nnz; ++i) { pmem[i] = rocsparse_double_complex(std::real(pdata[i]), std::imag(pdata[i])); } } template <> void convert_array(int nnz, const void* data, void* mem) { double* pmem = (double*)mem; const float* pdata = (const float*)data; for(int i = 0; i < nnz; ++i) { pmem[i] = pdata[i]; } } template rocsparse_status rocsparse_exporter_rocalution::write_sparse_csx(rocsparse_direction dir_, J m_, J n_, I nnz_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { if(dir_ != rocsparse_direction_row) { return rocsparse_status_not_implemented; } int m; int n; int nnz; rocsparse_status status; status = rocsparse_type_conversion(m_, m); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(n_, n); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(nnz_, nnz); if(status != rocsparse_status_success) { return status; } const int* ptr = nullptr; const int* ind = nullptr; const double* val = nullptr; int* ptr_mem = nullptr; int* ind_mem = nullptr; double* val_mem = nullptr; static constexpr bool ptr_same = std::is_same(); static constexpr bool ind_same = std::is_same(); static constexpr bool val_same = std::is_same() || std::is_same(); bool is_T_complex = (std::is_same() || std::is_same()); ptr_mem = nullptr; if(!ptr_same && (base_ != rocsparse_index_base_zero)) { rocsparse_hipHostMalloc(&ptr_mem, sizeof(int) * (m + 1)); } ind_mem = nullptr; if(!ind_same && (base_ != rocsparse_index_base_zero)) { rocsparse_hipHostMalloc(&ind_mem, sizeof(int) * nnz); } val_mem = nullptr; if(!val_same) { rocsparse_hipHostMalloc(&val_mem, sizeof(double) * (is_T_complex ? (2 * nnz) : nnz)); } ptr = (ptr_same || (base_ == rocsparse_index_base_zero)) ? ((const int*)ptr_) : ptr_mem; ind = (ind_same || (base_ == rocsparse_index_base_zero)) ? ((const int*)ind_) : ind_mem; val = (val_same) ? ((const double*)val_) : val_mem; if(ptr_mem != nullptr) { for(int i = 0; i < m + 1; ++i) { status = rocsparse_type_conversion(ptr_[i], ptr_mem[i]); if(status != rocsparse_status_success) { break; } } if(base_ == rocsparse_index_base_one) { for(int i = 0; i < m + 1; ++i) { ptr_mem[i] = ptr_mem[i] - 1; } } if(status != rocsparse_status_success) { return status; } } if(ind_mem != nullptr) { for(int i = 0; i < nnz; ++i) { status = rocsparse_type_conversion(ind_[i], ind_mem[i]); if(status != rocsparse_status_success) { break; } } if(status != rocsparse_status_success) { return status; } if(base_ == rocsparse_index_base_one) { for(int i = 0; i < nnz; ++i) { ind_mem[i] = ind_mem[i] - 1; } } } if(val_mem != nullptr) { convert_array(nnz, (const void*)val_, (void*)val_mem); } if(status != rocsparse_status_success) { return status; } status = rocalution_write_sparse_csx(this->m_filename.c_str(), m, n, nnz, ptr, ind, val); if(val_mem != nullptr) { rocsparse_hipFree(val_mem); val_mem = nullptr; } if(ind_mem != nullptr) { rocsparse_hipFree(ind_mem); ind_mem = nullptr; } if(ptr_mem != nullptr) { rocsparse_hipFree(ptr_mem); ptr_mem = nullptr; } return status; } template rocsparse_status rocsparse_exporter_rocalution::write_sparse_gebsx(rocsparse_direction dir_, rocsparse_direction dirb_, J mb_, J nb_, I nnzb_, J block_dim_row_, J block_dim_column_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_exporter_rocalution::write_dense_vector(I nmemb_, const T* __restrict__ x_, I incx_) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_exporter_rocalution::write_dense_matrix( rocsparse_order order_, I m_, I n_, const T* __restrict__ x_, I ld_) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_exporter_rocalution::write_sparse_coo(I m_, I n_, I nnz_, const I* __restrict__ row_ind_, const I* __restrict__ col_ind_, const T* __restrict__ val_, rocsparse_index_base base_) { return rocsparse_status_not_implemented; } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_exporter_rocalution::write_sparse_csx( \ rocsparse_direction, \ J, \ J, \ I, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base); \ template rocsparse_status rocsparse_exporter_rocalution::write_sparse_gebsx( \ rocsparse_direction, \ rocsparse_direction, \ J, \ J, \ I, \ J, \ J, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_exporter_rocalution::write_dense_vector( \ I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_rocalution::write_dense_matrix( \ rocsparse_order, I, I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_rocalution::write_sparse_coo( \ I, \ I, \ I, \ const I* __restrict__, \ const I* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_rocalution.hpp000066400000000000000000000075011447342677400260420ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_EXPORTER_ROCALUTION_HPP #define ROCSPARSE_EXPORTER_ROCALUTION_HPP #include "rocsparse_exporter.hpp" class rocsparse_exporter_rocalution : public rocsparse_exporter { protected: std::string m_filename{}; public: ~rocsparse_exporter_rocalution(); rocsparse_exporter_rocalution(const std::string& filename_); template rocsparse_status write_sparse_csx(rocsparse_direction dir, J m, J n, I nnz, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_gebsx(rocsparse_direction dir, rocsparse_direction dirb, J mb, J nb, I nnzb, J block_dim_row, J block_dim_column, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_coo(I m, I n, I nnz, const I* __restrict__ row_ind, const I* __restrict__ col_ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_dense_vector(I size, const T* __restrict__ x, I incx); template rocsparse_status write_dense_matrix(rocsparse_order order, I m, I n, const T* __restrict__ x, I ld); }; #endif // HEADER rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_rocsparseio.cpp000066400000000000000000000450111447342677400262050ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_exporter_rocsparseio.hpp" template rocsparse_status rocsparse_type_conversion(const X& x, Y& y); #ifdef ROCSPARSEIO template inline rocsparse_status rocsparse2rocsparseio_convert(const X& x, Y& y); template <> inline rocsparse_status rocsparse2rocsparseio_convert(const rocsparse_order& x, rocsparseio_order& y) { switch(x) { case rocsparse_order_row: { y = rocsparseio_order_row; return rocsparse_status_success; } case rocsparse_order_column: { y = rocsparseio_order_column; return rocsparse_status_success; } } return rocsparse_status_invalid_value; } template <> inline rocsparse_status rocsparse2rocsparseio_convert(const rocsparse_direction& x, rocsparseio_direction& y) { switch(x) { case rocsparse_direction_row: { y = rocsparseio_direction_row; return rocsparse_status_success; } case rocsparse_direction_column: { y = rocsparseio_direction_column; return rocsparse_status_success; } } return rocsparse_status_invalid_value; } template <> inline rocsparse_status rocsparse2rocsparseio_convert(const rocsparse_index_base& x, rocsparseio_index_base& y) { switch(x) { case rocsparse_index_base_zero: { y = rocsparseio_index_base_zero; return rocsparse_status_success; } case rocsparse_index_base_one: { y = rocsparseio_index_base_one; return rocsparse_status_success; } } return rocsparse_status_invalid_value; } template inline rocsparseio_type rocsparseio_type_convert(); template <> inline rocsparseio_type rocsparseio_type_convert() { return rocsparseio_type_int32; }; template <> inline rocsparseio_type rocsparseio_type_convert() { return rocsparseio_type_int64; }; template <> inline rocsparseio_type rocsparseio_type_convert() { return rocsparseio_type_float32; }; template <> inline rocsparseio_type rocsparseio_type_convert() { return rocsparseio_type_float64; }; template <> inline rocsparseio_type rocsparseio_type_convert() { return rocsparseio_type_complex32; }; template <> inline rocsparseio_type rocsparseio_type_convert() { return rocsparseio_type_complex64; }; #endif rocsparse_exporter_rocsparseio::~rocsparse_exporter_rocsparseio() { #ifdef ROCSPARSEIO const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Export done." << std::endl; } auto istatus = rocsparseio_close(this->m_handle); if(istatus != rocsparseio_status_success) { } #endif } rocsparse_exporter_rocsparseio::rocsparse_exporter_rocsparseio(const std::string& filename_) : m_filename(filename_) { #ifdef ROCSPARSEIO const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Opening file '" << this->m_filename << "' ... " << std::endl; } rocsparseio_status istatus; istatus = rocsparseio_open(&this->m_handle, rocsparseio_rwmode_write, this->m_filename.c_str()); if(istatus != rocsparseio_status_success) { std::cerr << "Problem with rocsparseio_open" << std::endl; throw rocsparse_status_internal_error; } #else throw rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_exporter_rocsparseio::write_sparse_csx(rocsparse_direction dir_, J m_, J n_, I nnz_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { #ifdef ROCSPARSEIO const rocsparseio_type ptr_type = rocsparseio_type_convert(); const rocsparseio_type ind_type = rocsparseio_type_convert(); const rocsparseio_type val_type = rocsparseio_type_convert(); rocsparseio_direction dir; size_t m; size_t n; size_t nnz; rocsparseio_index_base base; rocsparse_status status; status = rocsparse2rocsparseio_convert(dir_, dir); if(status != rocsparse_status_success) { return status; } status = rocsparse2rocsparseio_convert(base_, base); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(m_, m); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(n_, n); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(nnz_, nnz); if(status != rocsparse_status_success) { return status; } rocsparseio_status io_status = rocsparseio_write_sparse_csx( this->m_handle, dir, m, n, nnz, ptr_type, ptr_, ind_type, ind_, val_type, val_, base); if(io_status != rocsparseio_status_success) { return rocsparse_status_internal_error; } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_exporter_rocsparseio::write_sparse_gebsx(rocsparse_direction dir_, rocsparse_direction dirb_, J mb_, J nb_, I nnzb_, J block_dim_row_, J block_dim_column_, const I* __restrict__ ptr_, const J* __restrict__ ind_, const T* __restrict__ val_, rocsparse_index_base base_) { #ifdef ROCSPARSEIO const rocsparseio_type ptr_type = rocsparseio_type_convert(); const rocsparseio_type ind_type = rocsparseio_type_convert(); const rocsparseio_type val_type = rocsparseio_type_convert(); rocsparseio_direction dir; rocsparseio_direction dirb; size_t mb; size_t nb; size_t nnzb; size_t block_dim_row; size_t block_dim_column; rocsparseio_index_base base; rocsparse_status status; status = rocsparse2rocsparseio_convert(dir_, dir); if(status != rocsparse_status_success) { return status; } status = rocsparse2rocsparseio_convert(dirb_, dirb); if(status != rocsparse_status_success) { return status; } status = rocsparse2rocsparseio_convert(base_, base); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(mb_, mb); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(nb_, nb); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(nnzb_, nnzb); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(block_dim_row_, block_dim_row); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(block_dim_column_, block_dim_column); if(status != rocsparse_status_success) { return status; } rocsparseio_status io_status = rocsparseio_write_sparse_gebsx(this->m_handle, dir, dirb, mb, nb, nnzb, block_dim_row, block_dim_column, ptr_type, ptr_, ind_type, ind_, val_type, val_, base); if(io_status != rocsparseio_status_success) { return rocsparse_status_internal_error; } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_exporter_rocsparseio::write_dense_vector(I nmemb_, const T* __restrict__ x_, I incx_) { #ifdef ROCSPARSEIO const rocsparseio_type val_type = rocsparseio_type_convert(); size_t nmemb, incx; rocsparse_status status; status = rocsparse_type_conversion(nmemb_, nmemb); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(incx_, incx); if(status != rocsparse_status_success) { return status; } rocsparseio_status io_status = rocsparseio_write_dense_vector(this->m_handle, val_type, nmemb, x_, incx); if(io_status != rocsparseio_status_success) { return rocsparse_status_internal_error; } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_exporter_rocsparseio::write_dense_matrix( rocsparse_order order_, I m_, I n_, const T* __restrict__ x_, I ld_) { #ifdef ROCSPARSEIO rocsparseio_order order; size_t m, n, ld; rocsparse_status status; const rocsparseio_type val_type = rocsparseio_type_convert(); status = rocsparse2rocsparseio_convert(order_, order); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(m_, m); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(n_, n); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(ld_, ld); if(status != rocsparse_status_success) { return status; } rocsparseio_status io_status = rocsparseio_write_dense_matrix(this->m_handle, order, m, n, val_type, x_, ld); if(io_status != rocsparseio_status_success) { return rocsparse_status_internal_error; } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_exporter_rocsparseio::write_sparse_coo(I m_, I n_, I nnz_, const I* __restrict__ row_ind_, const I* __restrict__ col_ind_, const T* __restrict__ val_, rocsparse_index_base base_) { #ifdef ROCSPARSEIO const rocsparseio_type ind_type = rocsparseio_type_convert(); const rocsparseio_type val_type = rocsparseio_type_convert(); size_t m, n, nnz; rocsparseio_index_base base; rocsparse_status status; status = rocsparse2rocsparseio_convert(base_, base); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(m_, m); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(n_, n); if(status != rocsparse_status_success) { return status; } status = rocsparse_type_conversion(nnz_, nnz); if(status != rocsparse_status_success) { return status; } rocsparseio_status io_status = rocsparseio_write_sparse_coo( this->m_handle, m, n, nnz, ind_type, row_ind_, ind_type, col_ind_, val_type, val_, base); if(io_status != rocsparseio_status_success) { return rocsparse_status_internal_error; } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_exporter_rocsparseio::write_sparse_csx( \ rocsparse_direction, \ J, \ J, \ I, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base); \ template rocsparse_status rocsparse_exporter_rocsparseio::write_sparse_gebsx( \ rocsparse_direction, \ rocsparse_direction, \ J, \ J, \ I, \ J, \ J, \ const I* __restrict__, \ const J* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_exporter_rocsparseio::write_dense_vector( \ I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_rocsparseio::write_dense_matrix( \ rocsparse_order, I, I, const T* __restrict__, I); \ template rocsparse_status rocsparse_exporter_rocsparseio::write_sparse_coo( \ I, \ I, \ I, \ const I* __restrict__, \ const I* __restrict__, \ const T* __restrict__, \ rocsparse_index_base) INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_exporter_rocsparseio.hpp000066400000000000000000000077551447342677400262270ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_EXPORTER_ROCSPARSEIO_HPP #define ROCSPARSE_EXPORTER_ROCSPARSEIO_HPP #include "rocsparse_exporter.hpp" #ifdef ROCSPARSEIO #include "rocsparseio.h" #endif class rocsparse_exporter_rocsparseio : public rocsparse_exporter { protected: std::string m_filename{}; #ifdef ROCSPARSEIO rocsparseio_handle m_handle{}; #endif public: ~rocsparse_exporter_rocsparseio(); using IMPL = rocsparse_exporter_rocsparseio; rocsparse_exporter_rocsparseio(const std::string& filename_); template rocsparse_status write_sparse_csx(rocsparse_direction dir, J m, J n, I nnz, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_gebsx(rocsparse_direction dir, rocsparse_direction dirb, J mb, J nb, I nnzb, J block_dim_row, J block_dim_column, const I* __restrict__ ptr, const J* __restrict__ ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_sparse_coo(I m, I n, I nnz, const I* __restrict__ row_ind, const I* __restrict__ col_ind, const T* __restrict__ val, rocsparse_index_base base); template rocsparse_status write_dense_vector(I size, const T* __restrict__ x, I incx); template rocsparse_status write_dense_matrix(rocsparse_order order, I m, I n, const T* __restrict__ x, I ld); }; #endif // HEADER rocSPARSE-rocm-5.7.1/clients/common/rocsparse_gentest.py000077500000000000000000000433011447342677400232460ustar00rootroot00000000000000#!/usr/bin/env python3 # ######################################################################## # Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ######################################################################## """Expand rocSPARSE YAML test data file into binary Arguments records""" import re import sys import os import argparse import ctypes import glob from fnmatch import fnmatchcase try: # Import either the C or pure-Python YAML parser from yaml import CLoader as Loader except ImportError: from yaml import Loader import yaml # Regex for type names in the YAML file. Optional *nnn indicates array. TYPE_RE = re.compile(r'[a-z_A-Z]\w*(:?\s*\*\s*\d+)?$') # Regex for integer ranges A..B[..C] INT_RANGE_RE = re.compile(r'\s*(-?\d+)\s*\.\.\s*(-?\d+)\s*(?:\.\.\s*(-?\d+)\s*)?$') # Regex for include: YAML extension INCLUDE_RE = re.compile(r'include\s*:\s*(.*)') # Regex for complex types COMPLEX_RE = re.compile(r'f\d+_c$') args = {} testcases = set() datatypes = {} param = {} def main(): args.update(parse_args().__dict__) for doc in get_yaml_docs(): process_doc(doc) def process_doc(doc): """Process one document in the YAML file""" # Ignore empty documents if not doc or not doc.get('Tests'): return # Clear datatypes and params from previous documents datatypes.clear() param.clear() # Return dictionary of all known datatypes datatypes.update(get_datatypes(doc)) # Arguments structure corresponding to C/C++ structure param['Arguments'] = type('Arguments', (ctypes.Structure,), {'_fields_': get_arguments(doc)}) # Special names which get expanded as lists of arguments param['dict_lists_to_expand'] = doc.get('Dictionary lists to expand') or () # Lists which are not expanded param['lists_to_not_expand'] = doc.get('Lists to not expand') or () # Defaults defaults = doc.get('Defaults') or {} # Known Bugs param['known_bugs'] = doc.get('Known bugs') or [] # Functions param['Functions'] = doc.get('Functions') or {} # Instantiate all of the tests, starting with defaults for test in doc['Tests']: case = defaults.copy() case.update(test) generate(case, instantiate) def parse_args(): """Parse command-line arguments, returning input and output files""" parser = argparse.ArgumentParser(description=""" Expand rocSPARSE YAML test data file into binary Arguments records """) parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--out', dest='outfile', type=argparse.FileType('wb'), default=sys.stdout) parser.add_argument('-I', help="Add include path", action='append', dest='includes', default=[]) parser.add_argument('-t', '--template', type=argparse.FileType('r')) return parser.parse_args() def read_yaml_file(file): """Read the YAML file, processing include: lines as an extension""" file_dir = os.path.dirname(file.name) or os.getcwd() source = [] for line_no, line in enumerate(file, start=1): # Keep track of file names and line numbers for each line of YAML match = line.startswith('include') and INCLUDE_RE.match(line) if not match: source.append([line, file.name, line_no]) else: include_file = match.group(1) include_dirs = [file_dir] + args['includes'] for path in include_dirs: path = os.path.join(path, include_file) if os.path.exists(path): source.extend(read_yaml_file(open(path, 'r'))) break else: sys.exit("In file " + file.name + ", line " + str(line_no) + ", column " + str(match.start(1)+1) + ":\n" + line.rstrip() + "\n" + " " * match.start(1) + "^\nCannot open " + include_file + "\n\nInclude paths:\n" + "\n".join(include_dirs)) file.close() return source def get_yaml_docs(): """Parse the YAML file""" source = read_yaml_file(args['infile']) if args.get('template'): source = read_yaml_file(args['template']) + source source_str = ''.join([line[0] for line in source]) def mark_str(mark): line = source[mark.line] return("In file " + line[1] + ", line " + str(line[2]) + ", column " + str(mark.column + 1) + ":\n" + line[0].rstrip() + "\n" + ' ' * mark.column + "^\n") # We iterate through all of the documents to properly diagnose errors, # because the load_all generator does not handle exceptions correctly. docs = [] load = Loader(source_str) while load.check_data(): try: doc = load.get_data() except yaml.YAMLError as err: sys.exit((mark_str(err.problem_mark) if err.problem_mark else "") + (err.problem + "\n" if err.problem else "") + (err.note + "\n" if err.note else "")) else: docs.append(doc) return docs def get_datatypes(doc): """ Get datatypes from YAML doc""" dt = ctypes.__dict__.copy() for declaration in doc.get('Datatypes') or (): for name, decl in declaration.items(): if isinstance(decl, dict): # Create derived class type based on bases and attr entries dt[name] = type(name, tuple([eval(t, dt) for t in decl.get('bases') or () if TYPE_RE.match(t)] ), decl.get('attr') or {}) # Import class' attributes into the datatype namespace for subtype in decl.get('attr') or {}: if TYPE_RE.match(subtype): dt[subtype] = eval(name+'.'+subtype, dt) elif isinstance(decl, str) and TYPE_RE.match(decl): dt[name] = dt[decl] else: sys.exit("Unrecognized data type "+name+": "+repr(decl)) return dt def get_arguments(doc): """The kernel argument list, with argument names and types""" return [(var, eval(decl[var], datatypes)) for decl in doc.get('Arguments') or () if len(decl) == 1 for var in decl if TYPE_RE.match(decl[var])] def setdefaults(test): """Set default values for parameters""" # Do not put constant defaults here -- use rocsparse_common.yaml for that. # These are only for dynamic defaults # TODO: This should be ideally moved to YAML file, with eval'd expressions. if test['transA'] == 111 or test['transB'] == 111: test.setdefault('lda', 0) test.setdefault('ldb', 0) test.setdefault('ldc', 0) test.setdefault('ldd', 0) else: test.setdefault('lda', test['M'] if test['transA'] == 111 else test['K']) test.setdefault('ldb', test['K'] if test['transB'] == 111 else test['N']) test.setdefault('ldc', test['M']) test.setdefault('ldd', test['M']) def write_signature(out): """Write the signature used to verify binary file compatibility""" if 'signature_written' not in args: sig = 0 byt = bytearray("rocSPARSE", 'utf_8') byt.append(0) last_ofs = 0 for (name, ctype) in param['Arguments']._fields_: member = getattr(param['Arguments'], name) for i in range(0, member.offset - last_ofs): byt.append(0) for i in range(0, member.size): byt.append(sig ^ i) sig = (sig + 89) % 256 last_ofs = member.offset + member.size for i in range(0, ctypes.sizeof(param['Arguments']) - last_ofs): byt.append(0) byt.extend(bytes("ROCsparse", 'utf_8')) byt.append(0) out.write(byt) args['signature_written'] = True def write_test(test): """Write the test case out to the binary file if not seen already""" # For each argument declared in arguments, we generate a positional # argument in the Arguments constructor. For strings, we pass the # value of the string directly. For arrays, we unpack their contents # into the ctype array constructor and pass the ctype array. For # scalars, we coerce the string/numeric value into ctype. arg = [] for name, ctype in param['Arguments']._fields_: try: if issubclass(ctype, ctypes.Array): if issubclass(ctype._type_, ctypes.c_char): arg.append(bytes(test[name], 'utf_8')) else: arg.append(ctype(*test[name])) elif issubclass(ctype, ctypes.c_char): arg.append(bytes(test[name], 'utf_8')) else: arg.append(ctype(test[name])) except TypeError as err: sys.exit("TypeError: " + str(err) + " for " + name + ", which has type " + str(type(test[name])) + "\n") byt = bytes(param['Arguments'](*arg)) if byt not in testcases: testcases.add(byt) write_signature(args['outfile']) args['outfile'].write(byt) def instantiate(test): """Instantiate a given test case""" test = test.copy() # Any Arguments fields declared as enums enum_args = [decl[0] for decl in param['Arguments']._fields_ if decl[1].__module__ == '__main__'] try: setdefaults(test) # If no enum arguments are complex, clear alphai and betai for typename in enum_args: if COMPLEX_RE.match(test[typename]): break else: for name in ('alphai', 'betai', 'boostvali'): if name in test: test[name] = 0.0 # For enum arguments, replace name with value for typename in enum_args: test[typename] = datatypes[test[typename]] # Match known bugs if test['category'] not in ('known_bug', 'disabled'): for bug in param['known_bugs']: for key, value in bug.items(): if key not in test: break if key == 'function': if not fnmatchcase(test[key], value): break # For keys declared as enums, compare resulting values elif test[key] != (datatypes.get(value) if key in enum_args else value): break else: # All values specified in known bug match test case test['category'] = 'known_bug' break write_test(test) except KeyError as err: sys.exit("Undefined value " + str(err) + "\n" + str(test)) def generate(test, function): """Generate test combinations by iterating across lists recursively""" test = test.copy() # For specially named lists, they are expanded and merged into the test # argument list. When the list name is a dictionary of length 1, its pairs # indicate that the argument named by its key takes on values paired with # the argument named by its value, which is another dictionary list. We # process the value dictionaries' keys in alphabetic order, to ensure # deterministic test ordering. for argname in param['dict_lists_to_expand']: if type(argname) == dict: if len(argname) == 1: arg, target = list(argname.items())[0] if arg in test and type(test[arg]) == dict: pairs = sorted(list(test[arg].items()), key=lambda x: x[0]) for test[arg], test[target] in pairs: generate(test, function) return elif argname in test and type(test[argname]) in (tuple, list, dict): # Pop the list and iterate across it ilist = test.pop(argname) # For a bare dictionary, wrap it in a list and apply it once for item in [ilist] if type(ilist) == dict else ilist: try: case = test.copy() case.update(item) # original test merged with each item generate(case, function) except TypeError as err: sys.exit("TypeError: " + str(err) + " for " + argname + ", which has type " + str(type(item)) + "\nA name listed in \"Dictionary lists to expand\" " "must be a defined as a dictionary.\n") return for key in sorted(list(test)): # Integer arguments which are ranges (A..B[..C]) are expanded if type(test[key]) == str: match = INT_RANGE_RE.match(str(test[key])) if match: for test[key] in range(int(match.group(1)), int(match.group(2))+1, int(match.group(3) or 1)): generate(test, function) return # For sequence arguments, they are expanded into scalars elif (type(test[key]) in (tuple, list) and key not in param['lists_to_not_expand']): if key == "filename" and test[key] != "*": cleanlist=[] for test[key] in test[key]: # # Get the root path. # out_path = os.path.dirname(args['outfile'].name) + "/../matrices/" # # Get argument. # filename_arg = out_path + str(test[key]) # # Check if this is a valid argument # if ((not os.path.isdir(filename_arg)) and (not glob.glob(filename_arg)) and (not glob.glob(filename_arg + ".csr")) and (not glob.glob(filename_arg + ".bsr"))): print("skip unrecognized filename, directory or filename regular expression: '" + test[key] + "'") else: cleanlist.append(test[key]) if not cleanlist: return; test[key] = cleanlist for test[key] in test[key]: # # Get the root path. # out_path = os.path.dirname(args['outfile'].name) + "/../matrices/" # # Get argument. # filename_arg = out_path + str(test[key]) # # It is a directory. # if os.path.isdir(filename_arg): # # List the files. # names = glob.glob(filename_arg + "/*") for name in names: subpath=os.path.splitext(name.replace(out_path,""))[0] test[key]=[subpath] generate(test,function) else: # # Might be a regular expression # names = glob.glob(filename_arg) if not names: names = glob.glob(filename_arg + ".csr") if not names: names = glob.glob(filename_arg + ".bsr") generate(test,function) else: for name in names: subpath=os.path.splitext(name.replace(out_path,""))[0] test[key]=[subpath] generate(test,function) else: for test[key] in test[key]: generate(test,function) return # Replace typed function names with generic functions and types if 'rocsparse_function' in test: func = test.pop('rocsparse_function') if func in param['Functions']: test.update(param['Functions'][func]) else: test['function'] = func generate(test, function) return function(test) if __name__ == '__main__': main() rocSPARSE-rocm-5.7.1/clients/common/rocsparse_host.cpp000066400000000000000000016075201447342677400227130ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "utility.hpp" #include #ifdef _OPENMP #include #endif // BSR indexing macros #define BSR_IND(j, bi, bj, dir) \ ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj)) #define BSR_IND_R(j, bi, bj) (bsr_dim * bsr_dim * (j) + (bi)*bsr_dim + (bj)) #define BSR_IND_C(j, bi, bj) (bsr_dim * bsr_dim * (j) + (bi) + (bj)*bsr_dim) /* * =========================================================================== * level 1 SPARSE * =========================================================================== */ template void host_axpby( I size, I nnz, T alpha, const T* x_val, const I* x_ind, T beta, T* y, rocsparse_index_base base) { for(I i = 0; i < size; ++i) { y[i] *= beta; } for(I i = 0; i < nnz; ++i) { y[x_ind[i] - base] = std::fma(alpha, x_val[i], y[x_ind[i] - base]); } } template void host_doti( I nnz, const T* x_val, const I* x_ind, const T* y, T* result, rocsparse_index_base base) { *result = static_cast(0); for(I i = 0; i < nnz; ++i) { *result = std::fma(y[x_ind[i] - base], x_val[i], *result); } } template void host_dotci( I nnz, const T* x_val, const I* x_ind, const T* y, T* result, rocsparse_index_base base) { *result = static_cast(0); for(I i = 0; i < nnz; ++i) { *result = std::fma(rocsparse_conj(x_val[i]), y[x_ind[i] - base], *result); } } template void host_gthr(I nnz, const T* y, T* x_val, const I* x_ind, rocsparse_index_base base) { for(I i = 0; i < nnz; ++i) { x_val[i] = y[x_ind[i] - base]; } } template void host_gthrz( rocsparse_int nnz, T* y, T* x_val, const rocsparse_int* x_ind, rocsparse_index_base base) { for(rocsparse_int i = 0; i < nnz; ++i) { x_val[i] = y[x_ind[i] - base]; y[x_ind[i] - base] = static_cast(0); } } template void host_roti( I nnz, T* x_val, const I* x_ind, T* y, const T* c, const T* s, rocsparse_index_base base) { for(I i = 0; i < nnz; ++i) { I idx = x_ind[i] - base; T xs = x_val[i]; T ys = y[idx]; x_val[i] = *c * xs + *s * ys; y[idx] = *c * ys - *s * xs; } } template void host_sctr(I nnz, const T* x_val, const I* x_ind, T* y, rocsparse_index_base base) { for(I i = 0; i < nnz; ++i) { y[x_ind[i] - base] = x_val[i]; } } /* * =========================================================================== * level 2 SPARSE * =========================================================================== */ template void host_bsrmv(rocsparse_direction dir, rocsparse_operation trans, J mb, J nb, I nnzb, T alpha, const I* bsr_row_ptr, const J* bsr_col_ind, const A* bsr_val, J bsr_dim, const X* x, T beta, Y* y, rocsparse_index_base base) { // Quick return if(alpha == static_cast(0)) { if(beta != static_cast(1)) { for(J i = 0; i < mb * bsr_dim; ++i) { y[i] *= beta; } } return; } unsigned int WFSIZE; if(bsr_dim == 2) { I blocks_per_row = nnzb / mb; if(blocks_per_row < 8) { WFSIZE = 4; } else if(blocks_per_row < 16) { WFSIZE = 8; } else if(blocks_per_row < 32) { WFSIZE = 16; } else if(blocks_per_row < 64) { WFSIZE = 32; } else { WFSIZE = 64; } } else if(bsr_dim <= 8) { WFSIZE = 8; } else if(bsr_dim <= 16) { WFSIZE = 16; } else { WFSIZE = 32; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J row = 0; row < mb; ++row) { I row_begin = bsr_row_ptr[row] - base; I row_end = bsr_row_ptr[row + 1] - base; if(bsr_dim == 2) { std::vector sum0(WFSIZE, static_cast(0)); std::vector sum1(WFSIZE, static_cast(0)); for(I j = row_begin; j < row_end; j += WFSIZE) { for(unsigned int k = 0; k < WFSIZE; ++k) { if(j + k < row_end) { J col = bsr_col_ind[j + k] - base; if(dir == rocsparse_direction_column) { sum0[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 0]), static_cast(x[col * bsr_dim + 0]), static_cast(sum0[k])); sum1[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 1]), static_cast(x[col * bsr_dim + 0]), static_cast(sum1[k])); sum0[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 2]), static_cast(x[col * bsr_dim + 1]), static_cast(sum0[k])); sum1[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 3]), static_cast(x[col * bsr_dim + 1]), static_cast(sum1[k])); } else { sum0[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 0]), static_cast(x[col * bsr_dim + 0]), static_cast(sum0[k])); sum0[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 1]), static_cast(x[col * bsr_dim + 1]), static_cast(sum0[k])); sum1[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 2]), static_cast(x[col * bsr_dim + 0]), static_cast(sum1[k])); sum1[k] = std::fma(static_cast(bsr_val[bsr_dim * bsr_dim * (j + k) + 3]), static_cast(x[col * bsr_dim + 1]), static_cast(sum1[k])); } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum0[k] += sum0[k + j]; sum1[k] += sum1[k + j]; } } if(beta != static_cast(0)) { y[row * bsr_dim + 0] = std::fma(static_cast(beta), static_cast(y[row * bsr_dim + 0]), static_cast(alpha * sum0[0])); y[row * bsr_dim + 1] = std::fma(static_cast(beta), static_cast(y[row * bsr_dim + 1]), static_cast(alpha * sum1[0])); } else { y[row * bsr_dim + 0] = alpha * sum0[0]; y[row * bsr_dim + 1] = alpha * sum1[0]; } } else { for(J bi = 0; bi < bsr_dim; ++bi) { std::vector sum(WFSIZE, static_cast(0)); for(I j = row_begin; j < row_end; ++j) { J col = bsr_col_ind[j] - base; for(J bj = 0; bj < bsr_dim; bj += WFSIZE) { for(unsigned int k = 0; k < WFSIZE; ++k) { if(bj + k < bsr_dim) { if(dir == rocsparse_direction_column) { sum[k] = std::fma( static_cast(bsr_val[bsr_dim * bsr_dim * j + bsr_dim * (bj + k) + bi]), static_cast(x[bsr_dim * col + (bj + k)]), static_cast(sum[k])); } else { sum[k] = std::fma( static_cast(bsr_val[bsr_dim * bsr_dim * j + bsr_dim * bi + (bj + k)]), static_cast(x[bsr_dim * col + (bj + k)]), static_cast(sum[k])); } } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum[k] += sum[k + j]; } } if(beta != static_cast(0)) { y[row * bsr_dim + bi] = std::fma(static_cast(beta), static_cast(y[row * bsr_dim + bi]), static_cast(alpha * sum[0])); } else { y[row * bsr_dim + bi] = alpha * sum[0]; } } } } } template void host_bsrxmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int size_of_mask, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, T alpha, const rocsparse_int* bsr_mask_ptr, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_end_ptr, const rocsparse_int* bsr_col_ind, const T* bsr_val, rocsparse_int bsr_dim, const T* x, T beta, T* y, rocsparse_index_base base) { // Quick return if(alpha == static_cast(0)) { if(beta != static_cast(1)) { for(rocsparse_int i = 0; i < size_of_mask; ++i) { rocsparse_int shift = (bsr_mask_ptr[i] - base) * bsr_dim; for(rocsparse_int j = 0; j < bsr_dim; ++j) { y[shift + j] *= beta; } } } return; } rocsparse_int WFSIZE; if(bsr_dim == 2) { rocsparse_int blocks_per_row = nnzb / mb; if(blocks_per_row < 8) { WFSIZE = 4; } else if(blocks_per_row < 16) { WFSIZE = 8; } else if(blocks_per_row < 32) { WFSIZE = 16; } else if(blocks_per_row < 64) { WFSIZE = 32; } else { WFSIZE = 64; } } else if(bsr_dim <= 8) { WFSIZE = 8; } else if(bsr_dim <= 16) { WFSIZE = 16; } else { WFSIZE = 32; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int mask_idx = 0; mask_idx < size_of_mask; ++mask_idx) { rocsparse_int row = bsr_mask_ptr[mask_idx] - base; rocsparse_int row_begin = bsr_row_ptr[row] - base; rocsparse_int row_end = bsr_end_ptr[row] - base; if(bsr_dim == 2) { std::vector sum0(WFSIZE, static_cast(0)); std::vector sum1(WFSIZE, static_cast(0)); for(rocsparse_int j = row_begin; j < row_end; j += WFSIZE) { for(rocsparse_int k = 0; k < WFSIZE; ++k) { if(j + k < row_end) { rocsparse_int col = bsr_col_ind[j + k] - base; if(dir == rocsparse_direction_column) { sum0[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 0], x[col * bsr_dim + 0], sum0[k]); sum1[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 1], x[col * bsr_dim + 0], sum1[k]); sum0[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 2], x[col * bsr_dim + 1], sum0[k]); sum1[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 3], x[col * bsr_dim + 1], sum1[k]); } else { sum0[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 0], x[col * bsr_dim + 0], sum0[k]); sum0[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 1], x[col * bsr_dim + 1], sum0[k]); sum1[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 2], x[col * bsr_dim + 0], sum1[k]); sum1[k] = std::fma(bsr_val[bsr_dim * bsr_dim * (j + k) + 3], x[col * bsr_dim + 1], sum1[k]); } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum0[k] += sum0[k + j]; sum1[k] += sum1[k + j]; } } if(beta != static_cast(0)) { y[row * bsr_dim + 0] = std::fma(beta, y[row * bsr_dim + 0], alpha * sum0[0]); y[row * bsr_dim + 1] = std::fma(beta, y[row * bsr_dim + 1], alpha * sum1[0]); } else { y[row * bsr_dim + 0] = alpha * sum0[0]; y[row * bsr_dim + 1] = alpha * sum1[0]; } } else { for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { std::vector sum(WFSIZE, static_cast(0)); for(rocsparse_int j = row_begin; j < row_end; ++j) { rocsparse_int col = bsr_col_ind[j] - base; for(rocsparse_int bj = 0; bj < bsr_dim; bj += WFSIZE) { for(unsigned int k = 0; k < WFSIZE; ++k) { if(bj + k < bsr_dim) { if(dir == rocsparse_direction_column) { sum[k] = std::fma( bsr_val[bsr_dim * bsr_dim * j + bsr_dim * (bj + k) + bi], x[bsr_dim * col + (bj + k)], sum[k]); } else { sum[k] = std::fma( bsr_val[bsr_dim * bsr_dim * j + bsr_dim * bi + (bj + k)], x[bsr_dim * col + (bj + k)], sum[k]); } } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum[k] += sum[k + j]; } } if(beta != static_cast(0)) { y[row * bsr_dim + bi] = std::fma(beta, y[row * bsr_dim + bi], alpha * sum[0]); } else { y[row * bsr_dim + bi] = alpha * sum[0]; } } } } } template void host_gebsrmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, T alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const T* bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const T* x, T beta, T* y, rocsparse_index_base base) { // Quick return if(alpha == static_cast(0)) { if(beta != static_cast(1)) { for(rocsparse_int i = 0; i < mb * row_block_dim; ++i) { y[i] *= beta; } } return; } if(row_block_dim == col_block_dim) { host_bsrmv(dir, trans, mb, nb, nnzb, alpha, bsr_row_ptr, bsr_col_ind, bsr_val, row_block_dim, x, beta, y, base); return; } rocsparse_int WFSIZE; if(row_block_dim == 2 || row_block_dim == 3 || row_block_dim == 4) { rocsparse_int blocks_per_row = nnzb / mb; if(blocks_per_row < 8) { WFSIZE = 4; } else if(blocks_per_row < 16) { WFSIZE = 8; } else if(blocks_per_row < 32) { WFSIZE = 16; } else if(blocks_per_row < 64) { WFSIZE = 32; } else { WFSIZE = 64; } } else if(row_block_dim <= 8) { WFSIZE = 8; } else if(row_block_dim <= 16) { WFSIZE = 16; } else { WFSIZE = 32; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int row = 0; row < mb; ++row) { rocsparse_int row_begin = bsr_row_ptr[row] - base; rocsparse_int row_end = bsr_row_ptr[row + 1] - base; if(row_block_dim == 2) { std::vector sum0(WFSIZE, static_cast(0)); std::vector sum1(WFSIZE, static_cast(0)); for(rocsparse_int j = row_begin; j < row_end; j += WFSIZE) { for(rocsparse_int k = 0; k < WFSIZE; ++k) { if(j + k < row_end) { rocsparse_int col = bsr_col_ind[j + k] - base; for(rocsparse_int l = 0; l < col_block_dim; l++) { if(dir == rocsparse_direction_column) { sum0[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l], x[col * col_block_dim + l], sum0[k]); sum1[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l + 1], x[col * col_block_dim + l], sum1[k]); } else { sum0[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + l], x[col * col_block_dim + l], sum0[k]); sum1[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + col_block_dim + l], x[col * col_block_dim + l], sum1[k]); } } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum0[k] += sum0[k + j]; sum1[k] += sum1[k + j]; } } if(beta != static_cast(0)) { y[row * row_block_dim + 0] = std::fma(beta, y[row * row_block_dim + 0], alpha * sum0[0]); y[row * row_block_dim + 1] = std::fma(beta, y[row * row_block_dim + 1], alpha * sum1[0]); } else { y[row * row_block_dim + 0] = alpha * sum0[0]; y[row * row_block_dim + 1] = alpha * sum1[0]; } } else if(row_block_dim == 3) { std::vector sum0(WFSIZE, static_cast(0)); std::vector sum1(WFSIZE, static_cast(0)); std::vector sum2(WFSIZE, static_cast(0)); for(rocsparse_int j = row_begin; j < row_end; j += WFSIZE) { for(rocsparse_int k = 0; k < WFSIZE; ++k) { if(j + k < row_end) { rocsparse_int col = bsr_col_ind[j + k] - base; for(rocsparse_int l = 0; l < col_block_dim; l++) { if(dir == rocsparse_direction_column) { sum0[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l], x[col * col_block_dim + l], sum0[k]); sum1[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l + 1], x[col * col_block_dim + l], sum1[k]); sum2[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l + 2], x[col * col_block_dim + l], sum2[k]); } else { sum0[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + l], x[col * col_block_dim + l], sum0[k]); sum1[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + col_block_dim + l], x[col * col_block_dim + l], sum1[k]); sum2[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + 2 * col_block_dim + l], x[col * col_block_dim + l], sum2[k]); } } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum0[k] += sum0[k + j]; sum1[k] += sum1[k + j]; sum2[k] += sum2[k + j]; } } if(beta != static_cast(0)) { y[row * row_block_dim + 0] = std::fma(beta, y[row * row_block_dim + 0], alpha * sum0[0]); y[row * row_block_dim + 1] = std::fma(beta, y[row * row_block_dim + 1], alpha * sum1[0]); y[row * row_block_dim + 2] = std::fma(beta, y[row * row_block_dim + 2], alpha * sum2[0]); } else { y[row * row_block_dim + 0] = alpha * sum0[0]; y[row * row_block_dim + 1] = alpha * sum1[0]; y[row * row_block_dim + 2] = alpha * sum2[0]; } } else if(row_block_dim == 4) { std::vector sum0(WFSIZE, static_cast(0)); std::vector sum1(WFSIZE, static_cast(0)); std::vector sum2(WFSIZE, static_cast(0)); std::vector sum3(WFSIZE, static_cast(0)); for(rocsparse_int j = row_begin; j < row_end; j += WFSIZE) { for(rocsparse_int k = 0; k < WFSIZE; ++k) { if(j + k < row_end) { rocsparse_int col = bsr_col_ind[j + k] - base; for(rocsparse_int l = 0; l < col_block_dim; l++) { if(dir == rocsparse_direction_column) { sum0[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l], x[col * col_block_dim + l], sum0[k]); sum1[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l + 1], x[col * col_block_dim + l], sum1[k]); sum2[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l + 2], x[col * col_block_dim + l], sum2[k]); sum3[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + row_block_dim * l + 3], x[col * col_block_dim + l], sum3[k]); } else { sum0[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + l], x[col * col_block_dim + l], sum0[k]); sum1[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + col_block_dim + l], x[col * col_block_dim + l], sum1[k]); sum2[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + 2 * col_block_dim + l], x[col * col_block_dim + l], sum2[k]); sum3[k] = std::fma(bsr_val[row_block_dim * col_block_dim * (j + k) + 3 * col_block_dim + l], x[col * col_block_dim + l], sum3[k]); } } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum0[k] += sum0[k + j]; sum1[k] += sum1[k + j]; sum2[k] += sum2[k + j]; sum3[k] += sum3[k + j]; } } if(beta != static_cast(0)) { y[row * row_block_dim + 0] = std::fma(beta, y[row * row_block_dim + 0], alpha * sum0[0]); y[row * row_block_dim + 1] = std::fma(beta, y[row * row_block_dim + 1], alpha * sum1[0]); y[row * row_block_dim + 2] = std::fma(beta, y[row * row_block_dim + 2], alpha * sum2[0]); y[row * row_block_dim + 3] = std::fma(beta, y[row * row_block_dim + 3], alpha * sum3[0]); } else { y[row * row_block_dim + 0] = alpha * sum0[0]; y[row * row_block_dim + 1] = alpha * sum1[0]; y[row * row_block_dim + 2] = alpha * sum2[0]; y[row * row_block_dim + 3] = alpha * sum3[0]; } } else { for(rocsparse_int bi = 0; bi < row_block_dim; ++bi) { std::vector sum(WFSIZE, static_cast(0)); for(rocsparse_int j = row_begin; j < row_end; ++j) { rocsparse_int col = bsr_col_ind[j] - base; for(rocsparse_int bj = 0; bj < col_block_dim; bj += WFSIZE) { for(unsigned int k = 0; k < WFSIZE; ++k) { if(bj + k < col_block_dim) { if(dir == rocsparse_direction_column) { sum[k] = std::fma(bsr_val[row_block_dim * col_block_dim * j + row_block_dim * (bj + k) + bi], x[col_block_dim * col + (bj + k)], sum[k]); } else { sum[k] = std::fma(bsr_val[row_block_dim * col_block_dim * j + col_block_dim * bi + (bj + k)], x[col_block_dim * col + (bj + k)], sum[k]); } } } } } for(unsigned int j = 1; j < WFSIZE; j <<= 1) { for(unsigned int k = 0; k < WFSIZE - j; ++k) { sum[k] += sum[k + j]; } } if(beta != static_cast(0)) { y[row * row_block_dim + bi] = std::fma(beta, y[row * row_block_dim + bi], alpha * sum[0]); } else { y[row * row_block_dim + bi] = alpha * sum[0]; } } } } } template static inline void host_bsr_lsolve(rocsparse_direction dir, rocsparse_operation trans_X, rocsparse_int mb, rocsparse_int nrhs, T alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const T* bsr_val, rocsparse_int bsr_dim, const T* B, rocsparse_int ldb, T* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot) { #ifdef _OPENMP #pragma omp parallel for #endif for(rocsparse_int i = 0; i < nrhs; ++i) { // Process lower triangular part for(rocsparse_int bsr_row = 0; bsr_row < mb; ++bsr_row) { rocsparse_int bsr_row_begin = bsr_row_ptr[bsr_row] - base; rocsparse_int bsr_row_end = bsr_row_ptr[bsr_row + 1] - base; // Loop over blocks rows for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { rocsparse_int diag = -1; rocsparse_int local_row = bsr_row * bsr_dim + bi; rocsparse_int idx_B = (trans_X == rocsparse_operation_none) ? i * ldb + local_row : local_row * ldb + i; rocsparse_int idx_X = (trans_X == rocsparse_operation_none) ? i * ldx + local_row : local_row * ldx + i; T sum = alpha * B[idx_B]; T diag_val = static_cast(0); // Loop over BSR columns for(rocsparse_int j = bsr_row_begin; j < bsr_row_end; ++j) { rocsparse_int bsr_col = bsr_col_ind[j] - base; // Loop over blocks columns for(rocsparse_int bj = 0; bj < bsr_dim; ++bj) { rocsparse_int local_col = bsr_col * bsr_dim + bj; T local_val = (dir == rocsparse_direction_row) ? bsr_val[bsr_dim * bsr_dim * j + bi * bsr_dim + bj] : bsr_val[bsr_dim * bsr_dim * j + bi + bj * bsr_dim]; if(local_val == static_cast(0) && local_col == local_row && diag_type == rocsparse_diag_type_non_unit) { // Numerical zero pivot found, avoid division by 0 // and store index for later use. *numeric_pivot = std::min(*numeric_pivot, bsr_row + base); local_val = static_cast(1); } // Ignore all entries that are above the diagonal if(local_col > local_row) { break; } // Diagonal if(local_col == local_row) { // If diagonal type is non unit, do division by diagonal entry // This is not required for unit diagonal for obvious reasons if(diag_type == rocsparse_diag_type_non_unit) { diag = j; diag_val = static_cast(1) / local_val; } break; } // Lower triangular part rocsparse_int idx = (trans_X == rocsparse_operation_none) ? i * ldx + local_col : local_col * ldx + i; sum = std::fma(-local_val, X[idx], sum); } } if(diag_type == rocsparse_diag_type_non_unit) { if(diag == -1) { *struct_pivot = std::min(*struct_pivot, bsr_row + base); } X[idx_X] = sum * diag_val; } else { X[idx_X] = sum; } } } } } template static inline void host_bsr_usolve(rocsparse_direction dir, rocsparse_operation trans_X, rocsparse_int mb, rocsparse_int nrhs, T alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const T* bsr_val, rocsparse_int bsr_dim, const T* B, rocsparse_int ldb, T* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot) { #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < nrhs; ++i) { // Process upper triangular part for(rocsparse_int bsr_row = mb - 1; bsr_row >= 0; --bsr_row) { rocsparse_int bsr_row_begin = bsr_row_ptr[bsr_row] - base; rocsparse_int bsr_row_end = bsr_row_ptr[bsr_row + 1] - base; for(rocsparse_int bi = bsr_dim - 1; bi >= 0; --bi) { rocsparse_int local_row = bsr_row * bsr_dim + bi; rocsparse_int idx_B = (trans_X == rocsparse_operation_none) ? i * ldb + local_row : local_row * ldb + i; rocsparse_int idx_X = (trans_X == rocsparse_operation_none) ? i * ldx + local_row : local_row * ldx + i; T sum = alpha * B[idx_B]; rocsparse_int diag = -1; T diag_val = static_cast(0); for(rocsparse_int j = bsr_row_end - 1; j >= bsr_row_begin; --j) { rocsparse_int bsr_col = bsr_col_ind[j] - base; for(rocsparse_int bj = bsr_dim - 1; bj >= 0; --bj) { rocsparse_int local_col = bsr_col * bsr_dim + bj; T local_val = dir == rocsparse_direction_row ? bsr_val[bsr_dim * bsr_dim * j + bi * bsr_dim + bj] : bsr_val[bsr_dim * bsr_dim * j + bi + bj * bsr_dim]; // Ignore all entries that are below the diagonal if(local_col < local_row) { continue; } // Diagonal if(local_col == local_row) { if(diag_type == rocsparse_diag_type_non_unit) { // Check for numerical zero if(local_val == static_cast(0)) { *numeric_pivot = std::min(*numeric_pivot, bsr_row + base); local_val = static_cast(1); } diag = j; diag_val = static_cast(1) / local_val; } continue; } // Upper triangular part rocsparse_int idx = (trans_X == rocsparse_operation_none) ? i * ldx + local_col : local_col * ldx + i; sum = std::fma(-local_val, X[idx], sum); } } if(diag_type == rocsparse_diag_type_non_unit) { if(diag == -1) { *struct_pivot = std::min(*struct_pivot, bsr_row + base); } X[idx_X] = sum * diag_val; } else { X[idx_X] = sum; } } } } } template void host_bsrsv(rocsparse_operation trans, rocsparse_direction dir, rocsparse_int mb, rocsparse_int nnzb, T alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const T* bsr_val, rocsparse_int bsr_dim, const T* x, T* y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot) { // Initialize pivot *struct_pivot = mb + 1; *numeric_pivot = mb + 1; if(trans == rocsparse_operation_none) { if(fill_mode == rocsparse_fill_mode_lower) { host_bsr_lsolve(dir, rocsparse_operation_none, mb, 1, alpha, bsr_row_ptr, bsr_col_ind, bsr_val, bsr_dim, x, mb * bsr_dim, y, mb * bsr_dim, diag_type, base, struct_pivot, numeric_pivot); } else { host_bsr_usolve(dir, rocsparse_operation_none, mb, 1, alpha, bsr_row_ptr, bsr_col_ind, bsr_val, bsr_dim, x, mb * bsr_dim, y, mb * bsr_dim, diag_type, base, struct_pivot, numeric_pivot); } } else if(trans == rocsparse_operation_transpose) { // Transpose matrix std::vector bsrt_row_ptr; std::vector bsrt_col_ind; std::vector bsrt_val; host_bsr_to_bsc(mb, mb, nnzb, bsr_dim, bsr_row_ptr, bsr_col_ind, bsr_val, bsrt_col_ind, bsrt_row_ptr, bsrt_val, base, base); if(fill_mode == rocsparse_fill_mode_lower) { host_bsr_usolve(dir, rocsparse_operation_none, mb, 1, alpha, bsrt_row_ptr.data(), bsrt_col_ind.data(), bsrt_val.data(), bsr_dim, x, mb * bsr_dim, y, mb * bsr_dim, diag_type, base, struct_pivot, numeric_pivot); } else { host_bsr_lsolve(dir, rocsparse_operation_none, mb, 1, alpha, bsrt_row_ptr.data(), bsrt_col_ind.data(), bsrt_val.data(), bsr_dim, x, mb * bsr_dim, y, mb * bsr_dim, diag_type, base, struct_pivot, numeric_pivot); } } *numeric_pivot = std::min(*numeric_pivot, *struct_pivot); *struct_pivot = (*struct_pivot == mb + 1) ? -1 : *struct_pivot; *numeric_pivot = (*numeric_pivot == mb + 1) ? -1 : *numeric_pivot; } template void host_coomv(rocsparse_operation trans, I M, I N, int64_t nnz, T alpha, const I* coo_row_ind, const I* coo_col_ind, const A* coo_val, const X* x, T beta, Y* y, rocsparse_index_base base) { if(trans == rocsparse_operation_none) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < M; ++i) { y[i] *= beta; } for(int64_t i = 0; i < nnz; ++i) { y[coo_row_ind[i] - base] = std::fma(alpha * coo_val[i], x[coo_col_ind[i] - base], y[coo_row_ind[i] - base]); } } else { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < N; ++i) { y[i] *= beta; } for(int64_t i = 0; i < nnz; ++i) { I row = coo_row_ind[i] - base; I col = coo_col_ind[i] - base; T val = (trans == rocsparse_operation_transpose) ? coo_val[i] : rocsparse_conj(coo_val[i]); y[col] = std::fma(alpha * val, x[row], y[col]); } } } template void host_coomv_aos(rocsparse_operation trans, I M, I N, int64_t nnz, T alpha, const I* coo_ind, const A* coo_val, const X* x, T beta, Y* y, rocsparse_index_base base) { switch(trans) { case rocsparse_operation_none: { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < M; ++i) { y[i] *= beta; } for(int64_t i = 0; i < nnz; ++i) { y[coo_ind[2 * i] - base] = std::fma( alpha * coo_val[i], x[coo_ind[2 * i + 1] - base], y[coo_ind[2 * i] - base]); } break; } case rocsparse_operation_transpose: case rocsparse_operation_conjugate_transpose: { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < N; ++i) { y[i] *= beta; } for(int64_t i = 0; i < nnz; ++i) { I row = coo_ind[2 * i] - base; I col = coo_ind[2 * i + 1] - base; T val = (trans == rocsparse_operation_transpose) ? coo_val[i] : rocsparse_conj(coo_val[i]); y[col] = std::fma(alpha * val, x[row], y[col]); } break; } } } template inline A conj_val(A val, bool conj) { return conj ? rocsparse_conj(val) : val; } template static void host_csrmv_general(rocsparse_operation trans, J M, J N, I nnz, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const A* csr_val, const X* x, T beta, Y* y, rocsparse_index_base base, rocsparse_spmv_alg algo, bool force_conj) { bool conj = (trans == rocsparse_operation_conjugate_transpose || force_conj); if(trans == rocsparse_operation_none) { if(algo == rocsparse_spmv_alg_csr_stream) { // Get device properties int dev; hipDeviceProp_t prop; hipGetDevice(&dev); hipGetDeviceProperties(&prop, dev); int WF_SIZE; J nnz_per_row = (M == 0) ? 0 : (nnz / M); if(nnz_per_row < 4) WF_SIZE = 2; else if(nnz_per_row < 8) WF_SIZE = 4; else if(nnz_per_row < 16) WF_SIZE = 8; else if(nnz_per_row < 32) WF_SIZE = 16; else if(nnz_per_row < 64 || prop.warpSize == 32) WF_SIZE = 32; else WF_SIZE = 64; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; std::vector sum(WF_SIZE, static_cast(0)); for(I j = row_begin; j < row_end; j += WF_SIZE) { for(int k = 0; k < WF_SIZE; ++k) { if(j + k < row_end) { sum[k] = std::fma(alpha * conj_val(csr_val[j + k], conj), x[csr_col_ind[j + k] - base], sum[k]); } } } for(int j = 1; j < WF_SIZE; j <<= 1) { for(int k = 0; k < WF_SIZE - j; ++k) { sum[k] += sum[k + j]; } } if(beta == static_cast(0)) { y[i] = sum[0]; } else { y[i] = std::fma(beta, y[i], sum[0]); } } } else { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { T sum = static_cast(0); T err = static_cast(0); I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; for(I j = row_begin; j < row_end; ++j) { T old = sum; T prod = alpha * conj_val(csr_val[j], conj) * x[csr_col_ind[j] - base]; sum = sum + prod; err = (old - (sum - (sum - old))) + (prod - (sum - old)) + err; } if(beta != static_cast(0)) { y[i] = std::fma(beta, y[i], sum + err); } else { y[i] = sum + err; } } } } else { // Scale y with beta for(J i = 0; i < N; ++i) { y[i] *= beta; } // Transposed SpMV for(J i = 0; i < M; ++i) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; T row_val = alpha * x[i]; for(I j = row_begin; j < row_end; ++j) { J col = csr_col_ind[j] - base; A val = conj_val(csr_val[j], conj); y[col] = std::fma( static_cast(val), static_cast(row_val), static_cast(y[col])); } } } } template static void host_csrmv_symmetric(rocsparse_operation trans, J M, J N, I nnz, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const A* csr_val, const X* x, T beta, Y* y, rocsparse_index_base base, rocsparse_spmv_alg algo, bool force_conj) { bool conj = (trans == rocsparse_operation_conjugate_transpose || force_conj); if(algo == rocsparse_spmv_alg_csr_stream || trans != rocsparse_operation_none) { // Get device properties int dev; hipDeviceProp_t prop; hipGetDevice(&dev); hipGetDeviceProperties(&prop, dev); int WF_SIZE; J nnz_per_row = (M == 0) ? 0 : (nnz / M); if(nnz_per_row < 4) WF_SIZE = 2; else if(nnz_per_row < 8) WF_SIZE = 4; else if(nnz_per_row < 16) WF_SIZE = 8; else if(nnz_per_row < 32) WF_SIZE = 16; else if(nnz_per_row < 64 || prop.warpSize == 32) WF_SIZE = 32; else WF_SIZE = 64; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; std::vector sum(WF_SIZE, static_cast(0)); for(I j = row_begin; j < row_end; j += WF_SIZE) { for(int k = 0; k < WF_SIZE; ++k) { if(j + k < row_end) { A val = conj_val(csr_val[j + k], conj); sum[k] = std::fma(alpha * val, x[csr_col_ind[j + k] - base], sum[k]); } } } for(int j = 1; j < WF_SIZE; j <<= 1) { for(int k = 0; k < WF_SIZE - j; ++k) { sum[k] += sum[k + j]; } } if(beta == static_cast(0)) { y[i] = sum[0]; } else { y[i] = std::fma(beta, y[i], sum[0]); } } for(J i = 0; i < M; i++) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; T x_val = alpha * x[i]; for(I j = row_begin; j < row_end; ++j) { if((csr_col_ind[j] - base) != i) { y[csr_col_ind[j] - base] = std::fma(static_cast(conj_val(csr_val[j], conj)), static_cast(x_val), static_cast(y[csr_col_ind[j] - base])); } } } } else { // Scale y with beta #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { y[i] *= beta; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; i++) { T sum = static_cast(0); T err = static_cast(0); I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; for(I j = row_begin; j < row_end; ++j) { T old = sum; T prod = alpha * conj_val(csr_val[j], conj) * x[csr_col_ind[j] - base]; sum = sum + prod; err = (old - (sum - (sum - old))) + (prod - (sum - old)) + err; } y[i] += sum + err; } for(J i = 0; i < M; i++) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; T x_val = alpha * x[i]; for(I j = row_begin; j < row_end; ++j) { if((csr_col_ind[j] - base) != i) { y[csr_col_ind[j] - base] = std::fma(static_cast(conj_val(csr_val[j], conj)), static_cast(x_val), static_cast(y[csr_col_ind[j] - base])); } } } } } template void host_csrmv(rocsparse_operation trans, J M, J N, I nnz, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const A* csr_val, const X* x, T beta, Y* y, rocsparse_index_base base, rocsparse_matrix_type matrix_type, rocsparse_spmv_alg algo, bool force_conj) { switch(matrix_type) { case rocsparse_matrix_type_symmetric: { host_csrmv_symmetric(trans, M, N, nnz, alpha, csr_row_ptr, csr_col_ind, csr_val, x, beta, y, base, algo, force_conj); break; } default: { host_csrmv_general(trans, M, N, nnz, alpha, csr_row_ptr, csr_col_ind, csr_val, x, beta, y, base, algo, force_conj); break; } } } template void host_cscmv(rocsparse_operation trans, J M, J N, I nnz, T alpha, const I* __restrict csc_col_ptr, const J* __restrict csc_row_ind, const A* __restrict csc_val, const X* __restrict x, T beta, Y* __restrict y, rocsparse_index_base base, rocsparse_matrix_type matrix_type, rocsparse_spmv_alg algo) { switch(trans) { case rocsparse_operation_none: { return host_csrmv(rocsparse_operation_transpose, N, M, nnz, alpha, csc_col_ptr, csc_row_ind, csc_val, x, beta, y, base, matrix_type, algo, false); } case rocsparse_operation_transpose: { return host_csrmv(rocsparse_operation_none, N, M, nnz, alpha, csc_col_ptr, csc_row_ind, csc_val, x, beta, y, base, matrix_type, algo, false); } case rocsparse_operation_conjugate_transpose: { return host_csrmv(rocsparse_operation_none, N, M, nnz, alpha, csc_col_ptr, csc_row_ind, csc_val, x, beta, y, base, matrix_type, algo, true); } } } template static void host_csr_lsolve(J M, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, const T* x, T* y, rocsparse_diag_type diag_type, rocsparse_index_base base, J* struct_pivot, J* numeric_pivot) { // Get device properties int dev; hipDeviceProp_t prop; hipGetDevice(&dev); hipGetDeviceProperties(&prop, dev); std::vector temp(prop.warpSize); // Process lower triangular part for(J row = 0; row < M; ++row) { temp.assign(prop.warpSize, static_cast(0)); temp[0] = alpha * x[row]; I diag = -1; I row_begin = csr_row_ptr[row] - base; I row_end = csr_row_ptr[row + 1] - base; T diag_val = static_cast(0); for(I l = row_begin; l < row_end; l += prop.warpSize) { for(unsigned int k = 0; k < prop.warpSize; ++k) { I j = l + k; // Do not run out of bounds if(j >= row_end) { break; } J local_col = csr_col_ind[j] - base; T local_val = csr_val[j]; if(local_val == static_cast(0) && local_col == row && diag_type == rocsparse_diag_type_non_unit) { // Numerical zero pivot found, avoid division by 0 // and store index for later use. *numeric_pivot = std::min(*numeric_pivot, row + base); local_val = static_cast(1); } // Ignore all entries that are above the diagonal if(local_col > row) { break; } // Diagonal entry if(local_col == row) { // If diagonal type is non unit, do division by diagonal entry // This is not required for unit diagonal for obvious reasons if(diag_type == rocsparse_diag_type_non_unit) { diag = j; diag_val = static_cast(1) / local_val; } break; } // Lower triangular part temp[k] = std::fma(-local_val, y[local_col], temp[k]); } } for(unsigned int j = 1; j < prop.warpSize; j <<= 1) { for(unsigned int k = 0; k < prop.warpSize - j; ++k) { temp[k] += temp[k + j]; } } if(diag_type == rocsparse_diag_type_non_unit) { if(diag == -1) { *struct_pivot = std::min(*struct_pivot, row + base); } y[row] = temp[0] * diag_val; } else { y[row] = temp[0]; } } } template static void host_csr_usolve(J M, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, const T* x, T* y, rocsparse_diag_type diag_type, rocsparse_index_base base, J* struct_pivot, J* numeric_pivot) { // Get device properties int dev; hipDeviceProp_t prop; hipGetDevice(&dev); hipGetDeviceProperties(&prop, dev); std::vector temp(prop.warpSize); // Process upper triangular part for(J row = M - 1; row >= 0; --row) { temp.assign(prop.warpSize, static_cast(0)); temp[0] = alpha * x[row]; I diag = -1; I row_begin = csr_row_ptr[row] - base; I row_end = csr_row_ptr[row + 1] - base; T diag_val = static_cast(0); for(I l = row_end - 1; l >= row_begin; l -= prop.warpSize) { for(unsigned int k = 0; k < prop.warpSize; ++k) { I j = l - k; // Do not run out of bounds if(j < row_begin) { break; } J local_col = csr_col_ind[j] - base; T local_val = csr_val[j]; // Ignore all entries that are below the diagonal if(local_col < row) { continue; } // Diagonal entry if(local_col == row) { if(diag_type == rocsparse_diag_type_non_unit) { // Check for numerical zero if(local_val == static_cast(0)) { *numeric_pivot = std::min(*numeric_pivot, row + base); local_val = static_cast(1); } diag = j; diag_val = static_cast(1) / local_val; } continue; } // Upper triangular part temp[k] = std::fma(-local_val, y[local_col], temp[k]); } } for(unsigned int j = 1; j < prop.warpSize; j <<= 1) { for(unsigned int k = 0; k < prop.warpSize - j; ++k) { temp[k] += temp[k + j]; } } if(diag_type == rocsparse_diag_type_non_unit) { if(diag == -1) { *struct_pivot = std::min(*struct_pivot, row + base); } y[row] = temp[0] * diag_val; } else { y[row] = temp[0]; } } } template void host_csrsv(rocsparse_operation trans, J M, I nnz, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, const T* x, T* y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, J* struct_pivot, J* numeric_pivot) { // Initialize pivot *struct_pivot = M + 1; *numeric_pivot = M + 1; if(trans == rocsparse_operation_none) { if(fill_mode == rocsparse_fill_mode_lower) { host_csr_lsolve(M, alpha, csr_row_ptr, csr_col_ind, csr_val, x, y, diag_type, base, struct_pivot, numeric_pivot); } else { host_csr_usolve(M, alpha, csr_row_ptr, csr_col_ind, csr_val, x, y, diag_type, base, struct_pivot, numeric_pivot); } } else if(trans == rocsparse_operation_transpose || trans == rocsparse_operation_conjugate_transpose) { // Transpose matrix std::vector csrt_row_ptr(M + 1); std::vector csrt_col_ind(nnz); std::vector csrt_val(nnz); host_csr_to_csc(M, M, nnz, csr_row_ptr, csr_col_ind, csr_val, csrt_col_ind, csrt_row_ptr, csrt_val, rocsparse_action_numeric, base); if(trans == rocsparse_operation_conjugate_transpose) { for(size_t i = 0; i < csrt_val.size(); i++) { csrt_val[i] = rocsparse_conj(csrt_val[i]); } } if(fill_mode == rocsparse_fill_mode_lower) { host_csr_usolve(M, alpha, csrt_row_ptr.data(), csrt_col_ind.data(), csrt_val.data(), x, y, diag_type, base, struct_pivot, numeric_pivot); } else { host_csr_lsolve(M, alpha, csrt_row_ptr.data(), csrt_col_ind.data(), csrt_val.data(), x, y, diag_type, base, struct_pivot, numeric_pivot); } } *numeric_pivot = std::min(*numeric_pivot, *struct_pivot); *struct_pivot = (*struct_pivot == M + 1) ? -1 : *struct_pivot; *numeric_pivot = (*numeric_pivot == M + 1) ? -1 : *numeric_pivot; } template void host_coosv(rocsparse_operation trans, I M, int64_t nnz, T alpha, const std::vector& coo_row_ind, const std::vector& coo_col_ind, const std::vector& coo_val, const std::vector& x, std::vector& y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, I* struct_pivot, I* numeric_pivot) { if(std::is_same() && nnz < std::numeric_limits::max()) { std::vector csr_row_ptr(M + 1); host_coo_to_csr(M, nnz, coo_row_ind.data(), csr_row_ptr, base); host_csrsv(trans, M, nnz, alpha, csr_row_ptr.data(), coo_col_ind.data(), coo_val.data(), x.data(), y.data(), diag_type, fill_mode, base, struct_pivot, numeric_pivot); } else { std::vector csr_row_ptr(M + 1); host_coo_to_csr(M, nnz, coo_row_ind.data(), csr_row_ptr, base); host_csrsv(trans, M, nnz, alpha, csr_row_ptr.data(), coo_col_ind.data(), coo_val.data(), x.data(), y.data(), diag_type, fill_mode, base, struct_pivot, numeric_pivot); } } template void host_ellmv(rocsparse_operation trans, I M, I N, T alpha, const I* ell_col_ind, const A* ell_val, I ell_width, const X* x, T beta, Y* y, rocsparse_index_base base) { if(trans == rocsparse_operation_none) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < M; ++i) { T sum = static_cast(0); for(I p = 0; p < ell_width; ++p) { int64_t idx = (int64_t)p * M + i; I col = ell_col_ind[idx] - base; if(col >= 0 && col < N) { sum = std::fma( static_cast(ell_val[idx]), static_cast(x[col]), static_cast(sum)); } else { break; } } if(beta != static_cast(0)) { y[i] = std::fma( static_cast(beta), static_cast(y[i]), static_cast(alpha * sum)); } else { y[i] = alpha * sum; } } } else { // Scale y with beta for(I i = 0; i < N; ++i) { y[i] *= beta; } // Transposed SpMV for(I i = 0; i < M; ++i) { T row_val = alpha * x[i]; for(I p = 0; p < ell_width; ++p) { int64_t idx = (int64_t)p * M + i; I col = ell_col_ind[idx] - base; if(col >= 0 && col < N) { T val = (trans == rocsparse_operation_conjugate_transpose) ? rocsparse_conj(ell_val[idx]) : ell_val[idx]; y[col] = std::fma( static_cast(val), static_cast(row_val), static_cast(y[col])); } else { break; } } } } } template void host_hybmv(rocsparse_operation trans, rocsparse_int M, rocsparse_int N, T alpha, rocsparse_int ell_nnz, const rocsparse_int* ell_col_ind, const T* ell_val, rocsparse_int ell_width, rocsparse_int coo_nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, const T* coo_val, const T* x, T beta, T* y, rocsparse_index_base base) { T coo_beta = beta; // ELL part if(ell_nnz > 0) { host_ellmv(trans, M, N, alpha, ell_col_ind, ell_val, ell_width, x, beta, y, base); coo_beta = static_cast(1); } // COO part if(coo_nnz > 0) { host_coomv( trans, M, N, coo_nnz, alpha, coo_row_ind, coo_col_ind, coo_val, x, coo_beta, y, base); } } /* * =========================================================================== * level 3 SPARSE * =========================================================================== */ template void host_bsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transB, rocsparse_int Mb, rocsparse_int N, rocsparse_int Kb, rocsparse_int nnzb, const T* alpha, const rocsparse_mat_descr descr, const T* bsr_val_A, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_int block_dim, const T* B, rocsparse_int ldb, const T* beta, T* C, rocsparse_int ldc) { rocsparse_index_base base = rocsparse_get_mat_index_base(descr); if(transA != rocsparse_operation_none) { return; } if(transB != rocsparse_operation_none && transB != rocsparse_operation_transpose) { return; } rocsparse_int M = Mb * block_dim; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; i++) { rocsparse_int local_row = i % block_dim; rocsparse_int row_begin = bsr_row_ptr_A[i / block_dim] - base; rocsparse_int row_end = bsr_row_ptr_A[i / block_dim + 1] - base; for(rocsparse_int j = 0; j < N; j++) { rocsparse_int idx_C = i + j * ldc; T sum = static_cast(0); for(rocsparse_int s = row_begin; s < row_end; s++) { for(rocsparse_int t = 0; t < block_dim; t++) { rocsparse_int idx_A = (dir == rocsparse_direction_row) ? block_dim * block_dim * s + block_dim * local_row + t : block_dim * block_dim * s + block_dim * t + local_row; rocsparse_int idx_B = (transB == rocsparse_operation_none) ? j * ldb + block_dim * (bsr_col_ind_A[s] - base) + t : (block_dim * (bsr_col_ind_A[s] - base) + t) * ldb + j; sum = std::fma(bsr_val_A[idx_A], B[idx_B], sum); } } if(*beta == static_cast(0)) { C[idx_C] = *alpha * sum; } else { C[idx_C] = std::fma(*beta, C[idx_C], *alpha * sum); } } } } template void host_gebsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transB, rocsparse_int Mb, rocsparse_int N, rocsparse_int Kb, rocsparse_int nnzb, const T* alpha, const rocsparse_mat_descr descr, const T* bsr_val_A, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const T* B, rocsparse_int ldb, const T* beta, T* C, rocsparse_int ldc) { if(transA != rocsparse_operation_none) { return; } if(transB != rocsparse_operation_none && transB != rocsparse_operation_transpose) { return; } rocsparse_index_base base = rocsparse_get_mat_index_base(descr); rocsparse_int M = Mb * row_block_dim; const rocsparse_int rowXcol_block_dim = row_block_dim * col_block_dim; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int row_idx = 0; row_idx < M; ++row_idx) { const rocsparse_int row_block_idx = row_idx / row_block_dim, row_local_idx = row_idx % row_block_dim; const rocsparse_int start = bsr_row_ptr_A[row_block_idx] - base, bound = bsr_row_ptr_A[row_block_idx + 1] - base; for(rocsparse_int col_idx = 0; col_idx < N; ++col_idx) { const rocsparse_int idx_C = ldc * col_idx + row_idx; T sum = static_cast(0); for(rocsparse_int at = start; at < bound; ++at) { for(rocsparse_int col_local_idx = 0; col_local_idx < col_block_dim; ++col_local_idx) { const rocsparse_int idx_A = (dir == rocsparse_direction_row) ? rowXcol_block_dim * at + col_block_dim * row_local_idx + col_local_idx : rowXcol_block_dim * at + row_block_dim * col_local_idx + row_local_idx; const rocsparse_int idx_B = (transB == rocsparse_operation_none) ? col_idx * ldb + col_block_dim * (bsr_col_ind_A[at] - base) + col_local_idx : (col_block_dim * (bsr_col_ind_A[at] - base) + col_local_idx) * ldb + col_idx; sum = std::fma(bsr_val_A[idx_A], B[idx_B], sum); } } if(*beta == static_cast(0)) { C[idx_C] = *alpha * sum; } else { C[idx_C] = std::fma(*beta, C[idx_C], *alpha * sum); } } } } template void host_csrmm(J M, J N, J K, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* csr_row_ptr_A, const J* csr_col_ind_A, const T* csr_val_A, const T* B, J ldb, T beta, T* C, J ldc, rocsparse_order order, rocsparse_index_base base, bool force_conj_A) { bool conj_A = (transA == rocsparse_operation_conjugate_transpose || force_conj_A); bool conj_B = (transB == rocsparse_operation_conjugate_transpose); if(transA == rocsparse_operation_none) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; i++) { I row_begin = csr_row_ptr_A[i] - base; I row_end = csr_row_ptr_A[i + 1] - base; for(J j = 0; j < N; ++j) { T sum = static_cast(0); for(I k = row_begin; k < row_end; ++k) { J idx_B = 0; if((transB == rocsparse_operation_none && order == rocsparse_order_column) || (transB == rocsparse_operation_transpose && order == rocsparse_order_row) || (transB == rocsparse_operation_conjugate_transpose && order == rocsparse_order_row)) { idx_B = (csr_col_ind_A[k] - base + j * ldb); } else { idx_B = (j + (csr_col_ind_A[k] - base) * ldb); } sum = std::fma(conj_val(csr_val_A[k], conj_A), conj_val(B[idx_B], conj_B), sum); } J idx_C = (order == rocsparse_order_column) ? i + j * ldc : i * ldc + j; if(beta == static_cast(0)) { C[idx_C] = alpha * sum; } else { C[idx_C] = std::fma(beta, C[idx_C], alpha * sum); } } } } else { // scale C by beta for(J i = 0; i < K; i++) { for(J j = 0; j < N; ++j) { J idx_C = (order == rocsparse_order_column) ? i + j * ldc : i * ldc + j; C[idx_C] = beta * C[idx_C]; } } for(J i = 0; i < M; i++) { I row_begin = csr_row_ptr_A[i] - base; I row_end = csr_row_ptr_A[i + 1] - base; for(J j = 0; j < N; ++j) { for(I k = row_begin; k < row_end; ++k) { J col = csr_col_ind_A[k] - base; T val = conj_val(csr_val_A[k], conj_A); J idx_B = 0; if((transB == rocsparse_operation_none && order == rocsparse_order_column) || (transB == rocsparse_operation_transpose && order == rocsparse_order_row) || (transB == rocsparse_operation_conjugate_transpose && order == rocsparse_order_row)) { idx_B = (i + j * ldb); } else { idx_B = (j + i * ldb); } J idx_C = (order == rocsparse_order_column) ? col + j * ldc : col * ldc + j; C[idx_C] += alpha * val * conj_val(B[idx_B], conj_B); } } } } } template void host_csrmm_batched(J M, J N, J K, J batch_count_A, I offsets_batch_stride_A, I columns_values_batch_stride_A, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* csr_row_ptr_A, const J* csr_col_ind_A, const T* csr_val_A, const T* B, J ldb, J batch_count_B, I batch_stride_B, T beta, T* C, J ldc, J batch_count_C, I batch_stride_C, rocsparse_order order, rocsparse_index_base base, bool force_conj_A) { const bool Ci_A_Bi = (batch_count_A == 1 && batch_count_B == batch_count_C); const bool Ci_Ai_B = (batch_count_B == 1 && batch_count_A == batch_count_C); const bool Ci_Ai_Bi = (batch_count_A == batch_count_C && batch_count_A == batch_count_B); if(!Ci_A_Bi && !Ci_Ai_B && !Ci_Ai_Bi) { return; } if(Ci_A_Bi) { for(J i = 0; i < batch_count_C; i++) { host_csrmm(M, N, K, transA, transB, alpha, csr_row_ptr_A, csr_col_ind_A, csr_val_A, B + batch_stride_B * i, ldb, beta, C + batch_stride_C * i, ldc, order, base, force_conj_A); } } else if(Ci_Ai_B) { for(J i = 0; i < batch_count_C; i++) { host_csrmm(M, N, K, transA, transB, alpha, csr_row_ptr_A + offsets_batch_stride_A * i, csr_col_ind_A + columns_values_batch_stride_A * i, csr_val_A + columns_values_batch_stride_A * i, B, ldb, beta, C + batch_stride_C * i, ldc, order, base, force_conj_A); } } else if(Ci_Ai_Bi) { for(J i = 0; i < batch_count_C; i++) { host_csrmm(M, N, K, transA, transB, alpha, csr_row_ptr_A + offsets_batch_stride_A * i, csr_col_ind_A + columns_values_batch_stride_A * i, csr_val_A + columns_values_batch_stride_A * i, B + batch_stride_B * i, ldb, beta, C + batch_stride_C * i, ldc, order, base, force_conj_A); } } } template void host_coomm(I M, I N, I K, int64_t nnz, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* coo_row_ind_A, const I* coo_col_ind_A, const T* coo_val_A, const T* B, I ldb, T beta, T* C, I ldc, rocsparse_order order, rocsparse_index_base base) { bool conj_A = (transA == rocsparse_operation_conjugate_transpose); bool conj_B = (transB == rocsparse_operation_conjugate_transpose); if(transA == rocsparse_operation_none) { for(I j = 0; j < N; j++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < M; ++i) { int64_t idx_C = (order == rocsparse_order_column) ? i + j * (int64_t)ldc : i * (int64_t)ldc + j; C[idx_C] *= beta; } } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I j = 0; j < N; j++) { for(int64_t i = 0; i < nnz; ++i) { I row = coo_row_ind_A[i] - base; I col = coo_col_ind_A[i] - base; T val = alpha * coo_val_A[i]; int64_t idx_C = (order == rocsparse_order_column) ? row + j * (int64_t)ldc : row * (int64_t)ldc + j; int64_t idx_B = 0; if((transB == rocsparse_operation_none && order == rocsparse_order_column) || (transB != rocsparse_operation_none && order != rocsparse_order_column)) { idx_B = (col + j * (int64_t)ldb); } else { idx_B = (j + col * (int64_t)ldb); } C[idx_C] = std::fma(val, conj_val(B[idx_B], conj_B), C[idx_C]); } } } else { for(I j = 0; j < N; j++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < K; ++i) { int64_t idx_C = (order == rocsparse_order_column) ? i + j * (int64_t)ldc : i * (int64_t)ldc + j; C[idx_C] *= beta; } } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I j = 0; j < N; j++) { for(int64_t i = 0; i < nnz; ++i) { I row = coo_row_ind_A[i] - base; I col = coo_col_ind_A[i] - base; T val = alpha * conj_val(coo_val_A[i], conj_A); int64_t idx_C = (order == rocsparse_order_column) ? col + j * (int64_t)ldc : col * (int64_t)ldc + j; int64_t idx_B = 0; if((transB == rocsparse_operation_none && order == rocsparse_order_column) || (transB != rocsparse_operation_none && order != rocsparse_order_column)) { idx_B = (row + j * (int64_t)ldb); } else { idx_B = (j + row * (int64_t)ldb); } C[idx_C] = std::fma(val, conj_val(B[idx_B], conj_B), C[idx_C]); } } } } template void host_coomm_batched(I M, I N, I K, int64_t nnz, I batch_count_A, int64_t batch_stride_A, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* coo_row_ind_A, const I* coo_col_ind_A, const T* coo_val_A, const T* B, I ldb, I batch_count_B, int64_t batch_stride_B, T beta, T* C, I ldc, I batch_count_C, int64_t batch_stride_C, rocsparse_order order, rocsparse_index_base base) { bool Ci_A_Bi = (batch_count_A == 1 && batch_count_B == batch_count_C); bool Ci_Ai_B = (batch_count_B == 1 && batch_count_A == batch_count_C); bool Ci_Ai_Bi = (batch_count_A == batch_count_C && batch_count_A == batch_count_B); if(!Ci_A_Bi && !Ci_Ai_B && !Ci_Ai_Bi) { return; } if(Ci_A_Bi) { for(I i = 0; i < batch_count_C; i++) { host_coomm(M, N, K, nnz, transA, transB, alpha, coo_row_ind_A, coo_col_ind_A, coo_val_A, B + batch_stride_B * i, ldb, beta, C + batch_stride_C * i, ldc, order, base); } } else if(Ci_Ai_B) { for(I i = 0; i < batch_count_C; i++) { host_coomm(M, N, K, nnz, transA, transB, alpha, coo_row_ind_A + batch_stride_A * i, coo_col_ind_A + batch_stride_A * i, coo_val_A + batch_stride_A * i, B, ldb, beta, C + batch_stride_C * i, ldc, order, base); } } else if(Ci_Ai_Bi) { for(I i = 0; i < batch_count_C; i++) { host_coomm(M, N, K, nnz, transA, transB, alpha, coo_row_ind_A + batch_stride_A * i, coo_col_ind_A + batch_stride_A * i, coo_val_A + batch_stride_A * i, B + batch_stride_B * i, ldb, beta, C + batch_stride_C * i, ldc, order, base); } } } template void host_cscmm(J M, J N, J K, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* __restrict csc_col_ptr_A, const J* __restrict csc_row_ind_A, const T* __restrict csc_val_A, const T* __restrict B, J ldb, T beta, T* __restrict C, J ldc, rocsparse_order order, rocsparse_index_base base) { switch(transA) { case rocsparse_operation_none: { return host_csrmm(K, N, M, rocsparse_operation_transpose, transB, alpha, csc_col_ptr_A, csc_row_ind_A, csc_val_A, B, ldb, beta, C, ldc, order, base, false); } case rocsparse_operation_transpose: { return host_csrmm(K, N, M, rocsparse_operation_none, transB, alpha, csc_col_ptr_A, csc_row_ind_A, csc_val_A, B, ldb, beta, C, ldc, order, base, false); } case rocsparse_operation_conjugate_transpose: { return host_csrmm(K, N, M, rocsparse_operation_none, transB, alpha, csc_col_ptr_A, csc_row_ind_A, csc_val_A, B, ldb, beta, C, ldc, order, base, true); } } } template void host_cscmm_batched(J M, J N, J K, J batch_count_A, I offsets_batch_stride_A, I rows_values_batch_stride_A, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* csc_col_ptr_A, const J* csc_row_ind_A, const T* csc_val_A, const T* B, J ldb, J batch_count_B, I batch_stride_B, T beta, T* C, J ldc, J batch_count_C, I batch_stride_C, rocsparse_order order, rocsparse_index_base base) { switch(transA) { case rocsparse_operation_none: { return host_csrmm_batched(K, N, M, batch_count_A, offsets_batch_stride_A, rows_values_batch_stride_A, rocsparse_operation_transpose, transB, alpha, csc_col_ptr_A, csc_row_ind_A, csc_val_A, B, ldb, batch_count_B, batch_stride_B, beta, C, ldc, batch_count_C, batch_stride_C, order, base, false); } case rocsparse_operation_transpose: { return host_csrmm_batched(K, N, M, batch_count_A, offsets_batch_stride_A, rows_values_batch_stride_A, rocsparse_operation_none, transB, alpha, csc_col_ptr_A, csc_row_ind_A, csc_val_A, B, ldb, batch_count_B, batch_stride_B, beta, C, ldc, batch_count_C, batch_stride_C, order, base, false); } case rocsparse_operation_conjugate_transpose: { return host_csrmm_batched(K, N, M, batch_count_A, offsets_batch_stride_A, rows_values_batch_stride_A, rocsparse_operation_none, transB, alpha, csc_col_ptr_A, csc_row_ind_A, csc_val_A, B, ldb, batch_count_B, batch_stride_B, beta, C, ldc, batch_count_C, batch_stride_C, order, base, true); } } } template static inline void host_lssolve(J M, J nrhs, rocsparse_operation transB, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, T* B, J ldb, rocsparse_diag_type diag_type, rocsparse_index_base base, J* struct_pivot, J* numeric_pivot) { #ifdef _OPENMP #pragma omp parallel for #endif for(J i = 0; i < nrhs; ++i) { // Process lower triangular part for(J row = 0; row < M; ++row) { J idx_B = (transB == rocsparse_operation_none) ? i * ldb + row : row * ldb + i; T sum = static_cast(0); if(transB == rocsparse_operation_conjugate_transpose) { sum = alpha * rocsparse_conj(B[idx_B]); } else { sum = alpha * B[idx_B]; } I diag = -1; I row_begin = csr_row_ptr[row] - base; I row_end = csr_row_ptr[row + 1] - base; T diag_val = static_cast(0); for(I j = row_begin; j < row_end; ++j) { J local_col = csr_col_ind[j] - base; T local_val = csr_val[j]; if(local_val == static_cast(0) && local_col == row && diag_type == rocsparse_diag_type_non_unit) { // Numerical zero pivot found, avoid division by 0 and store // index for later use *numeric_pivot = std::min(*numeric_pivot, row + base); local_val = static_cast(1); } // Ignore all entries that are above the diagonal if(local_col > row) { break; } // Diagonal entry if(local_col == row) { // If diagonal type is non unit, do division by diagonal entry // This is not required for unit diagonal for obvious reasons if(diag_type == rocsparse_diag_type_non_unit) { diag = j; diag_val = static_cast(1) / local_val; } break; } // Lower triangular part J idx = (transB == rocsparse_operation_none) ? i * ldb + local_col : local_col * ldb + i; if(transB == rocsparse_operation_conjugate_transpose) { sum = std::fma(-local_val, rocsparse_conj(B[idx]), sum); } else { sum = std::fma(-local_val, B[idx], sum); } } if(diag_type == rocsparse_diag_type_non_unit) { if(diag == -1) { *struct_pivot = std::min(*struct_pivot, row + base); } B[idx_B] = sum * diag_val; } else { B[idx_B] = sum; } } } } template static inline void host_ussolve(J M, J nrhs, rocsparse_operation transB, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, T* B, J ldb, rocsparse_diag_type diag_type, rocsparse_index_base base, J* struct_pivot, J* numeric_pivot) { #ifdef _OPENMP #pragma omp parallel for #endif for(J i = 0; i < nrhs; ++i) { // Process upper triangular part for(J row = M - 1; row >= 0; --row) { J idx_B = (transB == rocsparse_operation_none) ? i * ldb + row : row * ldb + i; T sum = static_cast(0); if(transB == rocsparse_operation_conjugate_transpose) { sum = alpha * rocsparse_conj(B[idx_B]); } else { sum = alpha * B[idx_B]; } I diag = -1; I row_begin = csr_row_ptr[row] - base; I row_end = csr_row_ptr[row + 1] - base; T diag_val = static_cast(0); for(I j = row_end - 1; j >= row_begin; --j) { J local_col = csr_col_ind[j] - base; T local_val = csr_val[j]; // Ignore all entries that are below the diagonal if(local_col < row) { continue; } // Diagonal entry if(local_col == row) { if(diag_type == rocsparse_diag_type_non_unit) { // Check for numerical zero if(local_val == static_cast(0)) { *numeric_pivot = std::min(*numeric_pivot, row + base); local_val = static_cast(1); } diag = j; diag_val = static_cast(1) / local_val; } continue; } // Upper triangular part J idx = (transB == rocsparse_operation_none) ? i * ldb + local_col : local_col * ldb + i; if(transB == rocsparse_operation_conjugate_transpose) { sum = std::fma(-local_val, rocsparse_conj(B[idx]), sum); } else { sum = std::fma(-local_val, B[idx], sum); } } if(diag_type == rocsparse_diag_type_non_unit) { if(diag == -1) { *struct_pivot = std::min(*struct_pivot, row + base); } B[idx_B] = sum * diag_val; } else { B[idx_B] = sum; } } } } template void host_csrsm(J M, J nrhs, I nnz, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, T* B, J ldb, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, J* struct_pivot, J* numeric_pivot) { // Initialize pivot *struct_pivot = M + 1; *numeric_pivot = M + 1; if(transA == rocsparse_operation_none) { if(fill_mode == rocsparse_fill_mode_lower) { host_lssolve(M, nrhs, transB, alpha, csr_row_ptr, csr_col_ind, csr_val, B, ldb, diag_type, base, struct_pivot, numeric_pivot); } else { host_ussolve(M, nrhs, transB, alpha, csr_row_ptr, csr_col_ind, csr_val, B, ldb, diag_type, base, struct_pivot, numeric_pivot); } } else if(transA == rocsparse_operation_transpose || transA == rocsparse_operation_conjugate_transpose) { // Transpose matrix std::vector csrt_row_ptr(M + 1); std::vector csrt_col_ind(nnz); std::vector csrt_val(nnz); host_csr_to_csc(M, M, nnz, csr_row_ptr, csr_col_ind, csr_val, csrt_col_ind, csrt_row_ptr, csrt_val, rocsparse_action_numeric, base); if(transA == rocsparse_operation_conjugate_transpose) { for(size_t i = 0; i < csrt_val.size(); i++) { csrt_val[i] = rocsparse_conj(csrt_val[i]); } } if(fill_mode == rocsparse_fill_mode_lower) { host_ussolve(M, nrhs, transB, alpha, csrt_row_ptr.data(), csrt_col_ind.data(), csrt_val.data(), B, ldb, diag_type, base, struct_pivot, numeric_pivot); } else { host_lssolve(M, nrhs, transB, alpha, csrt_row_ptr.data(), csrt_col_ind.data(), csrt_val.data(), B, ldb, diag_type, base, struct_pivot, numeric_pivot); } } *numeric_pivot = std::min(*numeric_pivot, *struct_pivot); *struct_pivot = (*struct_pivot == M + 1) ? -1 : *struct_pivot; *numeric_pivot = (*numeric_pivot == M + 1) ? -1 : *numeric_pivot; } template void host_coosm(I M, I nrhs, int64_t nnz, rocsparse_operation transA, rocsparse_operation transB, T alpha, const I* coo_row_ind, const I* coo_col_ind, const T* coo_val, T* B, I ldb, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, I* struct_pivot, I* numeric_pivot) { if(std::is_same() && nnz < std::numeric_limits::max()) { std::vector csr_row_ptr(M + 1); host_coo_to_csr(M, nnz, coo_row_ind, csr_row_ptr, base); host_csrsm(M, nrhs, nnz, transA, transB, alpha, csr_row_ptr.data(), coo_col_ind, coo_val, B, ldb, diag_type, fill_mode, base, struct_pivot, numeric_pivot); } else { std::vector csr_row_ptr(M + 1); host_coo_to_csr(M, nnz, coo_row_ind, csr_row_ptr, base); host_csrsm(M, nrhs, nnz, transA, transB, alpha, csr_row_ptr.data(), coo_col_ind, coo_val, B, ldb, diag_type, fill_mode, base, struct_pivot, numeric_pivot); } } template void host_bsrsm(rocsparse_int mb, rocsparse_int nrhs, rocsparse_int nnzb, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transX, T alpha, const rocsparse_int* __restrict bsr_row_ptr, const rocsparse_int* __restrict bsr_col_ind, const T* __restrict bsr_val, rocsparse_int bsr_dim, const T* B, rocsparse_int ldb, T* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot) { // Initialize pivot *struct_pivot = mb + 1; *numeric_pivot = mb + 1; if(transA == rocsparse_operation_none) { if(fill_mode == rocsparse_fill_mode_lower) { host_bsr_lsolve(dir, transX, mb, nrhs, alpha, bsr_row_ptr, bsr_col_ind, bsr_val, bsr_dim, B, ldb, X, ldx, diag_type, base, struct_pivot, numeric_pivot); } else { host_bsr_usolve(dir, transX, mb, nrhs, alpha, bsr_row_ptr, bsr_col_ind, bsr_val, bsr_dim, B, ldb, X, ldx, diag_type, base, struct_pivot, numeric_pivot); } } else if(transA == rocsparse_operation_transpose) { // Transpose matrix std::vector bsrt_row_ptr(mb + 1); std::vector bsrt_col_ind(nnzb); std::vector bsrt_val(nnzb * bsr_dim * bsr_dim); host_bsr_to_bsc(mb, mb, nnzb, bsr_dim, bsr_row_ptr, bsr_col_ind, bsr_val, bsrt_col_ind, bsrt_row_ptr, bsrt_val, base, base); if(fill_mode == rocsparse_fill_mode_lower) { host_bsr_usolve(dir, transX, mb, nrhs, alpha, bsrt_row_ptr.data(), bsrt_col_ind.data(), bsrt_val.data(), bsr_dim, B, ldb, X, ldx, diag_type, base, struct_pivot, numeric_pivot); } else { host_bsr_lsolve(dir, transX, mb, nrhs, alpha, bsrt_row_ptr.data(), bsrt_col_ind.data(), bsrt_val.data(), bsr_dim, B, ldb, X, ldx, diag_type, base, struct_pivot, numeric_pivot); } } *numeric_pivot = std::min(*numeric_pivot, *struct_pivot); *struct_pivot = (*struct_pivot == mb + 1) ? -1 : *struct_pivot; *numeric_pivot = (*numeric_pivot == mb + 1) ? -1 : *numeric_pivot; } template void host_gemvi(I M, I N, T alpha, const T* A, I lda, I nnz, const T* x_val, const I* x_ind, T beta, T* y, rocsparse_index_base base) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < M; ++i) { T sum = static_cast(0); for(I j = 0; j < nnz; ++j) { sum = std::fma(x_val[j], A[x_ind[j] * lda + i], sum); } y[i] = std::fma(alpha, sum, beta * y[i]); } } template void host_gemmi(rocsparse_int M, rocsparse_int N, rocsparse_operation transA, rocsparse_operation transB, T alpha, const T* A, rocsparse_int lda, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, const T* csr_val, T beta, T* C, rocsparse_int ldc, rocsparse_index_base base) { if(transB == rocsparse_operation_transpose) { for(rocsparse_int i = 0; i < M; ++i) { for(rocsparse_int j = 0; j < N; ++j) { T sum = static_cast(0); rocsparse_int row_begin = csr_row_ptr[j] - base; rocsparse_int row_end = csr_row_ptr[j + 1] - base; for(rocsparse_int k = row_begin; k < row_end; ++k) { rocsparse_int col_B = csr_col_ind[k] - base; T val_B = csr_val[k]; T val_A = A[col_B * lda + i]; sum = std::fma(val_A, val_B, sum); } C[j * ldc + i] = std::fma(beta, C[j * ldc + i], alpha * sum); } } } } /* * =========================================================================== * extra SPARSE * =========================================================================== */ template void host_bsrgemm_nnzb(J Mb, J Nb, J Kb, J block_dim, const T* alpha, const I* bsr_row_ptr_A, const J* bsr_col_ind_A, const I* bsr_row_ptr_B, const J* bsr_col_ind_B, const T* beta, const I* bsr_row_ptr_D, const J* bsr_col_ind_D, I* bsr_row_ptr_C, I* nnzb_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C, rocsparse_index_base base_D) { return host_csrgemm_nnz(Mb, Nb, Kb, alpha, bsr_row_ptr_A, bsr_col_ind_A, bsr_row_ptr_B, bsr_col_ind_B, beta, bsr_row_ptr_D, bsr_col_ind_D, bsr_row_ptr_C, nnzb_C, base_A, base_B, base_C, base_D); } template void host_bsrgemm(rocsparse_direction dir, J Mb, J Nb, J Kb, J block_dim, const T* alpha, const I* bsr_row_ptr_A, const J* bsr_col_ind_A, const T* bsr_val_A, const I* bsr_row_ptr_B, const J* bsr_col_ind_B, const T* bsr_val_B, const T* beta, const I* bsr_row_ptr_D, const J* bsr_col_ind_D, const T* bsr_val_D, const I* bsr_row_ptr_C, J* bsr_col_ind_C, T* bsr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C, rocsparse_index_base base_D) { if(Mb == 0 || Nb == 0) { return; } else if(alpha && !beta && (Kb == 0)) { return; } else if(!alpha && !beta) { return; } #ifdef _OPENMP #pragma omp parallel #endif { std::vector nnzb(Nb, -1); int nthreads = 1; int tid = 0; #ifdef _OPENMP nthreads = omp_get_num_threads(); tid = omp_get_thread_num(); #endif J rows_per_thread = (Mb + nthreads - 1) / nthreads; J chunk_begin = rows_per_thread * tid; J chunk_end = std::min(chunk_begin + rows_per_thread, Mb); // Loop over rows of A for(J i = chunk_begin; i < chunk_end; ++i) { I row_begin_C = bsr_row_ptr_C[i] - base_C; I row_end_C = row_begin_C; if(alpha) { I row_begin_A = bsr_row_ptr_A[i] - base_A; I row_end_A = bsr_row_ptr_A[i + 1] - base_A; // Loop over columns of A for(I j = row_begin_A; j < row_end_A; ++j) { // Current column of A J col_A = bsr_col_ind_A[j] - base_A; I row_begin_B = bsr_row_ptr_B[col_A] - base_B; I row_end_B = bsr_row_ptr_B[col_A + 1] - base_B; // Loop over columns of B in row col_A for(I k = row_begin_B; k < row_end_B; ++k) { // Current column of B J col_B = bsr_col_ind_B[k] - base_B; // Check if a new nnzb is generated or if the product is appended if(nnzb[col_B] < row_begin_C) { nnzb[col_B] = row_end_C; bsr_col_ind_C[row_end_C] = col_B + base_C; for(J r = 0; r < block_dim; r++) { for(J c = 0; c < block_dim; c++) { T val_C = static_cast(0); if(dir == rocsparse_direction_row) { for(J a = 0; a < block_dim; a++) { val_C = std::fma(bsr_val_A[block_dim * block_dim * j + block_dim * r + a], bsr_val_B[block_dim * block_dim * k + block_dim * a + c], val_C); } bsr_val_C[block_dim * block_dim * row_end_C + block_dim * r + c] = *alpha * val_C; } else { for(J a = 0; a < block_dim; a++) { val_C = std::fma(bsr_val_A[block_dim * block_dim * j + block_dim * a + r], bsr_val_B[block_dim * block_dim * k + block_dim * c + a], val_C); } bsr_val_C[block_dim * block_dim * row_end_C + block_dim * c + r] = *alpha * val_C; } } } ++row_end_C; } else { for(J r = 0; r < block_dim; r++) { for(J c = 0; c < block_dim; c++) { T val_C = static_cast(0); if(dir == rocsparse_direction_row) { for(J a = 0; a < block_dim; a++) { val_C = std::fma(bsr_val_A[block_dim * block_dim * j + block_dim * r + a], bsr_val_B[block_dim * block_dim * k + block_dim * a + c], val_C); } bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * r + c] = std::fma(*alpha, val_C, bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * r + c]); } else { for(J a = 0; a < block_dim; a++) { val_C = std::fma(bsr_val_A[block_dim * block_dim * j + block_dim * a + r], bsr_val_B[block_dim * block_dim * k + block_dim * c + a], val_C); } bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * c + r] = std::fma(*alpha, val_C, bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * c + r]); } } } } } } } // Add nnzb of D if beta != 0 if(beta) { I row_begin_D = bsr_row_ptr_D[i] - base_D; I row_end_D = bsr_row_ptr_D[i + 1] - base_D; // Loop over columns of D for(I j = row_begin_D; j < row_end_D; ++j) { // Current column of D J col_D = bsr_col_ind_D[j] - base_D; // Current value of D // Check if a new nnzb is generated or if the value is added if(nnzb[col_D] < row_begin_C) { nnzb[col_D] = row_end_C; bsr_col_ind_C[row_end_C] = col_D + base_C; for(J r = 0; r < block_dim; r++) { for(J c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * row_end_C + block_dim * r + c] = *beta * bsr_val_D[block_dim * block_dim * j + block_dim * r + c]; } else { bsr_val_C[block_dim * block_dim * row_end_C + block_dim * c + r] = *beta * bsr_val_D[block_dim * block_dim * j + block_dim * c + r]; } } } ++row_end_C; } else { for(J r = 0; r < block_dim; r++) { for(J c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * nnzb[col_D] + block_dim * r + c] = std::fma(*beta, bsr_val_D[block_dim * block_dim * j + block_dim * r + c], bsr_val_C[block_dim * block_dim * nnzb[col_D] + block_dim * r + c]); } else { bsr_val_C[block_dim * block_dim * nnzb[col_D] + block_dim * c + r] = std::fma(*beta, bsr_val_D[block_dim * block_dim * j + block_dim * c + r], bsr_val_C[block_dim * block_dim * nnzb[col_D] + block_dim * c + r]); } } } } } } } } I nnzb = bsr_row_ptr_C[Mb] - base_C; std::vector col(nnzb); std::vector val(block_dim * block_dim * nnzb); memcpy(col.data(), bsr_col_ind_C, sizeof(J) * nnzb); memcpy(val.data(), bsr_val_C, sizeof(T) * block_dim * block_dim * nnzb); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < Mb; ++i) { I row_begin = bsr_row_ptr_C[i] - base_C; I row_end = bsr_row_ptr_C[i + 1] - base_C; J row_nnzb = row_end - row_begin; std::vector perm(row_nnzb); for(J j = 0; j < row_nnzb; ++j) { perm[j] = j; } J* col_entry = &col[row_begin]; T* val_entry = &val[block_dim * block_dim * row_begin]; std::sort(perm.begin(), perm.end(), [&](const J& a, const J& b) { return col_entry[a] <= col_entry[b]; }); for(J j = 0; j < row_nnzb; ++j) { bsr_col_ind_C[row_begin + j] = col_entry[perm[j]]; for(J r = 0; r < block_dim; r++) { for(J c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * (row_begin + j) + block_dim * r + c] = val_entry[block_dim * block_dim * perm[j] + block_dim * r + c]; } else { bsr_val_C[block_dim * block_dim * (row_begin + j) + block_dim * c + r] = val_entry[block_dim * block_dim * perm[j] + block_dim * c + r]; } } } } } } template void host_bsrgeam_nnzb(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, T alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, T beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, rocsparse_int* bsr_row_ptr_C, rocsparse_int* nnzb_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C) { return host_csrgeam_nnz(Mb, Nb, alpha, bsr_row_ptr_A, bsr_col_ind_A, beta, bsr_row_ptr_B, bsr_col_ind_B, bsr_row_ptr_C, nnzb_C, base_A, base_B, base_C); } template void host_bsrgeam(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, T alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, const T* bsr_val_A, T beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, const T* bsr_val_B, const rocsparse_int* bsr_row_ptr_C, rocsparse_int* bsr_col_ind_C, T* bsr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C) { #ifdef _OPENMP #pragma omp parallel #endif { std::vector nnzb(Nb, -1); #ifdef _OPENMP rocsparse_int nthreads = omp_get_num_threads(); rocsparse_int tid = omp_get_thread_num(); #else rocsparse_int nthreads = 1; rocsparse_int tid = 0; #endif rocsparse_int rows_per_thread = (Mb + nthreads - 1) / nthreads; rocsparse_int chunk_begin = rows_per_thread * tid; rocsparse_int chunk_end = std::min(chunk_begin + rows_per_thread, Mb); // Loop over rows for(rocsparse_int i = chunk_begin; i < chunk_end; ++i) { rocsparse_int row_begin_C = bsr_row_ptr_C[i] - base_C; rocsparse_int row_end_C = row_begin_C; rocsparse_int row_begin_A = bsr_row_ptr_A[i] - base_A; rocsparse_int row_end_A = bsr_row_ptr_A[i + 1] - base_A; // Copy A into C for(rocsparse_int j = row_begin_A; j < row_end_A; ++j) { // Current column of A rocsparse_int col_A = bsr_col_ind_A[j] - base_A; nnzb[col_A] = row_end_C; bsr_col_ind_C[row_end_C] = col_A + base_C; for(rocsparse_int r = 0; r < block_dim; r++) { for(rocsparse_int c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * row_end_C + block_dim * r + c] = alpha * bsr_val_A[block_dim * block_dim * j + block_dim * r + c]; } else { bsr_val_C[block_dim * block_dim * row_end_C + block_dim * c + r] = alpha * bsr_val_A[block_dim * block_dim * j + block_dim * c + r]; } } } ++row_end_C; } rocsparse_int row_begin_B = bsr_row_ptr_B[i] - base_B; rocsparse_int row_end_B = bsr_row_ptr_B[i + 1] - base_B; // Loop over columns of B for(rocsparse_int j = row_begin_B; j < row_end_B; ++j) { // Current column of B rocsparse_int col_B = bsr_col_ind_B[j] - base_B; // Check if a new nnz is generated or if the value is added if(nnzb[col_B] < row_begin_C) { nnzb[col_B] = row_end_C; bsr_col_ind_C[row_end_C] = col_B + base_C; for(rocsparse_int r = 0; r < block_dim; r++) { for(rocsparse_int c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * row_end_C + block_dim * r + c] = beta * bsr_val_B[block_dim * block_dim * j + block_dim * r + c]; } else { bsr_val_C[block_dim * block_dim * row_end_C + block_dim * c + r] = beta * bsr_val_B[block_dim * block_dim * j + block_dim * c + r]; } } } ++row_end_C; } else { for(rocsparse_int r = 0; r < block_dim; r++) { for(rocsparse_int c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * r + c] = std::fma( beta, bsr_val_B[block_dim * block_dim * j + block_dim * r + c], bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * r + c]); } else { bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * c + r] = std::fma( beta, bsr_val_B[block_dim * block_dim * j + block_dim * c + r], bsr_val_C[block_dim * block_dim * nnzb[col_B] + block_dim * c + r]); } } } } } } } rocsparse_int nnzb = bsr_row_ptr_C[Mb] - base_C; std::vector col(nnzb); std::vector val(block_dim * block_dim * nnzb); std::copy(bsr_col_ind_C, bsr_col_ind_C + nnzb, col.begin()); std::copy(bsr_val_C, bsr_val_C + block_dim * block_dim * nnzb, val.begin()); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < Mb; ++i) { rocsparse_int row_begin = bsr_row_ptr_C[i] - base_C; rocsparse_int row_end = bsr_row_ptr_C[i + 1] - base_C; rocsparse_int row_nnzb = row_end - row_begin; std::vector perm(row_nnzb); for(rocsparse_int j = 0; j < row_nnzb; ++j) { perm[j] = j; } rocsparse_int* col_entry = &col[row_begin]; T* val_entry = &val[block_dim * block_dim * row_begin]; std::sort(perm.begin(), perm.end(), [&](const rocsparse_int& a, const rocsparse_int& b) { return col_entry[a] <= col_entry[b]; }); for(rocsparse_int j = 0; j < row_nnzb; ++j) { bsr_col_ind_C[row_begin + j] = col_entry[perm[j]]; for(rocsparse_int r = 0; r < block_dim; r++) { for(rocsparse_int c = 0; c < block_dim; c++) { if(dir == rocsparse_direction_row) { bsr_val_C[block_dim * block_dim * (row_begin + j) + block_dim * r + c] = val_entry[block_dim * block_dim * perm[j] + block_dim * r + c]; } else { bsr_val_C[block_dim * block_dim * (row_begin + j) + block_dim * c + r] = val_entry[block_dim * block_dim * perm[j] + block_dim * c + r]; } } } } } } template void host_csrgeam_nnz(rocsparse_int M, rocsparse_int N, T alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, T beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, rocsparse_int* csr_row_ptr_C, rocsparse_int* nnz_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C) { #ifdef _OPENMP #pragma omp parallel #endif { std::vector nnz(N, -1); #ifdef _OPENMP rocsparse_int nthreads = omp_get_num_threads(); rocsparse_int tid = omp_get_thread_num(); #else rocsparse_int nthreads = 1; rocsparse_int tid = 0; #endif rocsparse_int rows_per_thread = (M + nthreads - 1) / nthreads; rocsparse_int chunk_begin = rows_per_thread * tid; rocsparse_int chunk_end = std::min(chunk_begin + rows_per_thread, M); // Index base csr_row_ptr_C[0] = base_C; // Loop over rows for(rocsparse_int i = chunk_begin; i < chunk_end; ++i) { // Initialize csr row pointer with previous row offset csr_row_ptr_C[i + 1] = 0; rocsparse_int row_begin_A = csr_row_ptr_A[i] - base_A; rocsparse_int row_end_A = csr_row_ptr_A[i + 1] - base_A; // Loop over columns of A for(rocsparse_int j = row_begin_A; j < row_end_A; ++j) { rocsparse_int col_A = csr_col_ind_A[j] - base_A; nnz[col_A] = i; ++csr_row_ptr_C[i + 1]; } rocsparse_int row_begin_B = csr_row_ptr_B[i] - base_B; rocsparse_int row_end_B = csr_row_ptr_B[i + 1] - base_B; // Loop over columns of B for(rocsparse_int j = row_begin_B; j < row_end_B; ++j) { rocsparse_int col_B = csr_col_ind_B[j] - base_B; // Check if a new nnz is generated if(nnz[col_B] != i) { nnz[col_B] = i; ++csr_row_ptr_C[i + 1]; } } } } // Scan to obtain row offsets for(rocsparse_int i = 0; i < M; ++i) { csr_row_ptr_C[i + 1] += csr_row_ptr_C[i]; } *nnz_C = csr_row_ptr_C[M] - base_C; } template void host_csrgeam(rocsparse_int M, rocsparse_int N, T alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, const T* csr_val_A, T beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, const T* csr_val_B, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, T* csr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C) { #ifdef _OPENMP #pragma omp parallel #endif { std::vector nnz(N, -1); #ifdef _OPENMP rocsparse_int nthreads = omp_get_num_threads(); rocsparse_int tid = omp_get_thread_num(); #else rocsparse_int nthreads = 1; rocsparse_int tid = 0; #endif rocsparse_int rows_per_thread = (M + nthreads - 1) / nthreads; rocsparse_int chunk_begin = rows_per_thread * tid; rocsparse_int chunk_end = std::min(chunk_begin + rows_per_thread, M); // Loop over rows for(rocsparse_int i = chunk_begin; i < chunk_end; ++i) { rocsparse_int row_begin_C = csr_row_ptr_C[i] - base_C; rocsparse_int row_end_C = row_begin_C; rocsparse_int row_begin_A = csr_row_ptr_A[i] - base_A; rocsparse_int row_end_A = csr_row_ptr_A[i + 1] - base_A; // Copy A into C for(rocsparse_int j = row_begin_A; j < row_end_A; ++j) { // Current column of A rocsparse_int col_A = csr_col_ind_A[j] - base_A; // Current value of A T val_A = alpha * csr_val_A[j]; nnz[col_A] = row_end_C; csr_col_ind_C[row_end_C] = col_A + base_C; csr_val_C[row_end_C] = val_A; ++row_end_C; } rocsparse_int row_begin_B = csr_row_ptr_B[i] - base_B; rocsparse_int row_end_B = csr_row_ptr_B[i + 1] - base_B; // Loop over columns of B for(rocsparse_int j = row_begin_B; j < row_end_B; ++j) { // Current column of B rocsparse_int col_B = csr_col_ind_B[j] - base_B; // Current value of B T val_B = beta * csr_val_B[j]; // Check if a new nnz is generated or if the value is added if(nnz[col_B] < row_begin_C) { nnz[col_B] = row_end_C; csr_col_ind_C[row_end_C] = col_B + base_C; csr_val_C[row_end_C] = val_B; ++row_end_C; } else { csr_val_C[nnz[col_B]] += val_B; } } } } rocsparse_int nnz = csr_row_ptr_C[M] - base_C; std::vector col(nnz); std::vector val(nnz); std::copy(csr_col_ind_C, csr_col_ind_C + nnz, col.begin()); std::copy(csr_val_C, csr_val_C + nnz, val.begin()); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; ++i) { rocsparse_int row_begin = csr_row_ptr_C[i] - base_C; rocsparse_int row_end = csr_row_ptr_C[i + 1] - base_C; rocsparse_int row_nnz = row_end - row_begin; std::vector perm(row_nnz); for(rocsparse_int j = 0; j < row_nnz; ++j) { perm[j] = j; } rocsparse_int* col_entry = col.data() + row_begin; T* val_entry = val.data() + row_begin; std::sort(perm.begin(), perm.end(), [&](const rocsparse_int& a, const rocsparse_int& b) { return col_entry[a] <= col_entry[b]; }); for(rocsparse_int j = 0; j < row_nnz; ++j) { csr_col_ind_C[row_begin + j] = col_entry[perm[j]]; csr_val_C[row_begin + j] = val_entry[perm[j]]; } } } template void host_csrgemm_nnz(J M, J N, J K, const T* alpha, const I* csr_row_ptr_A, const J* csr_col_ind_A, const I* csr_row_ptr_B, const J* csr_col_ind_B, const T* beta, const I* csr_row_ptr_D, const J* csr_col_ind_D, I* csr_row_ptr_C, I* nnz_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C, rocsparse_index_base base_D) { if(M == 0 || N == 0) { *nnz_C = 0; if(M > 0) { for(J i = 0; i <= M; ++i) { csr_row_ptr_C[i] = base_C; } } return; } else if(alpha && !beta && (K == 0)) { *nnz_C = 0; if(M > 0) { for(J i = 0; i <= M; ++i) { csr_row_ptr_C[i] = base_C; } } return; } else if(!alpha && !beta) { *nnz_C = 0; if(M > 0) { for(J i = 0; i <= M; ++i) { csr_row_ptr_C[i] = base_C; } } return; } #ifdef _OPENMP #pragma omp parallel #endif { std::vector nnz(N, -1); int nthreads = 1; int tid = 0; #ifdef _OPENMP nthreads = omp_get_num_threads(); tid = omp_get_thread_num(); #endif J rows_per_thread = (M + nthreads - 1) / nthreads; J chunk_begin = rows_per_thread * tid; J chunk_end = std::min(chunk_begin + rows_per_thread, M); // Index base csr_row_ptr_C[0] = base_C; // Loop over rows of A for(J i = chunk_begin; i < chunk_end; ++i) { // Initialize csr row pointer with previous row offset csr_row_ptr_C[i + 1] = 0; if(alpha) { I row_begin_A = csr_row_ptr_A[i] - base_A; I row_end_A = csr_row_ptr_A[i + 1] - base_A; // Loop over columns of A for(I j = row_begin_A; j < row_end_A; ++j) { // Current column of A J col_A = csr_col_ind_A[j] - base_A; I row_begin_B = csr_row_ptr_B[col_A] - base_B; I row_end_B = csr_row_ptr_B[col_A + 1] - base_B; // Loop over columns of B in row col_A for(I k = row_begin_B; k < row_end_B; ++k) { // Current column of B J col_B = csr_col_ind_B[k] - base_B; // Check if a new nnz is generated if(nnz[col_B] != i) { nnz[col_B] = i; ++csr_row_ptr_C[i + 1]; } } } } // Add nnz of D if beta != 0 if(beta) { I row_begin_D = csr_row_ptr_D[i] - base_D; I row_end_D = csr_row_ptr_D[i + 1] - base_D; // Loop over columns of D for(I j = row_begin_D; j < row_end_D; ++j) { J col_D = csr_col_ind_D[j] - base_D; // Check if a new nnz is generated if(nnz[col_D] != i) { nnz[col_D] = i; ++csr_row_ptr_C[i + 1]; } } } } } // Scan to obtain row offsets for(J i = 0; i < M; ++i) { csr_row_ptr_C[i + 1] += csr_row_ptr_C[i]; } *nnz_C = csr_row_ptr_C[M] - base_C; } template void host_csrgemm(J M, J N, J L, const T* alpha, const I* csr_row_ptr_A, const J* csr_col_ind_A, const T* csr_val_A, const I* csr_row_ptr_B, const J* csr_col_ind_B, const T* csr_val_B, const T* beta, const I* csr_row_ptr_D, const J* csr_col_ind_D, const T* csr_val_D, const I* csr_row_ptr_C, J* csr_col_ind_C, T* csr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C, rocsparse_index_base base_D) { if(M == 0 || N == 0) { return; } else if(alpha && !beta && (L == 0)) { return; } else if(!alpha && !beta) { return; } #ifdef _OPENMP #pragma omp parallel #endif { std::vector nnz(N, -1); int nthreads = 1; int tid = 0; #ifdef _OPENMP nthreads = omp_get_num_threads(); tid = omp_get_thread_num(); #endif J rows_per_thread = (M + nthreads - 1) / nthreads; J chunk_begin = rows_per_thread * tid; J chunk_end = std::min(chunk_begin + rows_per_thread, M); // Loop over rows of A for(J i = chunk_begin; i < chunk_end; ++i) { I row_begin_C = csr_row_ptr_C[i] - base_C; I row_end_C = row_begin_C; if(alpha) { I row_begin_A = csr_row_ptr_A[i] - base_A; I row_end_A = csr_row_ptr_A[i + 1] - base_A; // Loop over columns of A for(I j = row_begin_A; j < row_end_A; ++j) { // Current column of A J col_A = csr_col_ind_A[j] - base_A; // Current value of A T val_A = *alpha * csr_val_A[j]; I row_begin_B = csr_row_ptr_B[col_A] - base_B; I row_end_B = csr_row_ptr_B[col_A + 1] - base_B; // Loop over columns of B in row col_A for(I k = row_begin_B; k < row_end_B; ++k) { // Current column of B J col_B = csr_col_ind_B[k] - base_B; // Current value of B T val_B = csr_val_B[k]; // Check if a new nnz is generated or if the product is appended if(nnz[col_B] < row_begin_C) { nnz[col_B] = row_end_C; csr_col_ind_C[row_end_C] = col_B + base_C; csr_val_C[row_end_C] = val_A * val_B; ++row_end_C; } else { csr_val_C[nnz[col_B]] += val_A * val_B; } } } } // Add nnz of D if beta != 0 if(beta) { I row_begin_D = csr_row_ptr_D[i] - base_D; I row_end_D = csr_row_ptr_D[i + 1] - base_D; // Loop over columns of D for(I j = row_begin_D; j < row_end_D; ++j) { // Current column of D J col_D = csr_col_ind_D[j] - base_D; // Current value of D T val_D = *beta * csr_val_D[j]; // Check if a new nnz is generated or if the value is added if(nnz[col_D] < row_begin_C) { nnz[col_D] = row_end_C; csr_col_ind_C[row_end_C] = col_D + base_C; csr_val_C[row_end_C] = val_D; ++row_end_C; } else { csr_val_C[nnz[col_D]] += val_D; } } } } } I nnz = csr_row_ptr_C[M] - base_C; std::vector col(nnz); std::vector val(nnz); memcpy(col.data(), csr_col_ind_C, sizeof(J) * nnz); memcpy(val.data(), csr_val_C, sizeof(T) * nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { I row_begin = csr_row_ptr_C[i] - base_C; I row_end = csr_row_ptr_C[i + 1] - base_C; J row_nnz = row_end - row_begin; std::vector perm(row_nnz); for(J j = 0; j < row_nnz; ++j) { perm[j] = j; } J* col_entry = col.data() + row_begin; T* val_entry = val.data() + row_begin; std::sort(perm.begin(), perm.end(), [&](const J& a, const J& b) { return col_entry[a] <= col_entry[b]; }); for(J j = 0; j < row_nnz; ++j) { csr_col_ind_C[row_begin + j] = col_entry[perm[j]]; csr_val_C[row_begin + j] = val_entry[perm[j]]; } } } template void rocsparse_host::cooddmm(rocsparse_operation transA, rocsparse_operation transB, rocsparse_order orderA, rocsparse_order orderB, J M, J N, J K, I nnz, const T* alpha, const T* A, J lda, const T* B, J ldb, const T* beta, const I* coo_row_ind_C, const I* coo_col_ind_C, T* coo_val_C, rocsparse_index_base base_C) { const T a = *alpha; const T b = *beta; const J incx = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? lda : 1) : ((transA == rocsparse_operation_none) ? 1 : lda); const J incy = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? 1 : ldb) : ((transB == rocsparse_operation_none) ? ldb : 1); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I s = 0; s < nnz; ++s) { const I i = coo_row_ind_C[s] - base_C; const I j = coo_col_ind_C[s] - base_C; const T* x = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? (A + i) : (A + lda * i)) : ((transA == rocsparse_operation_none) ? (A + lda * i) : (A + i)); const T* y = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? (B + ldb * j) : (B + j)) : ((transB == rocsparse_operation_none) ? (B + j) : (B + ldb * j)); T sum = static_cast(0); for(J k = 0; k < K; ++k) { sum += x[incx * k] * y[incy * k]; } coo_val_C[s] = coo_val_C[s] * b + a * sum; } } template void rocsparse_host::cooaosddmm(rocsparse_operation transA, rocsparse_operation transB, rocsparse_order orderA, rocsparse_order orderB, J M, J N, J K, I nnz, const T* alpha, const T* A, J lda, const T* B, J ldb, const T* beta, const I* coo_row_ind_C, const I* coo_col_ind_C, T* coo_val_C, rocsparse_index_base base_C) { const T a = *alpha; const T b = *beta; const J incx = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? lda : 1) : ((transA == rocsparse_operation_none) ? 1 : lda); const J incy = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? 1 : ldb) : ((transB == rocsparse_operation_none) ? ldb : 1); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I s = 0; s < nnz; ++s) { const I i = coo_row_ind_C[2 * s] - base_C; const I j = coo_col_ind_C[2 * s] - base_C; const T* x = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? (A + i) : (A + lda * i)) : ((transA == rocsparse_operation_none) ? (A + lda * i) : (A + i)); const T* y = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? (B + ldb * j) : (B + j)) : ((transB == rocsparse_operation_none) ? (B + j) : (B + ldb * j)); T sum = static_cast(0); for(J k = 0; k < K; ++k) { sum += x[incx * k] * y[incy * k]; } coo_val_C[s] = coo_val_C[s] * b + a * sum; } } template void rocsparse_host::csrddmm(rocsparse_operation transA, rocsparse_operation transB, rocsparse_order orderA, rocsparse_order orderB, J M, J N, J K, I nnz, const T* alpha, const T* A, J lda, const T* B, J ldb, const T* beta, const I* csr_row_ptr_C, const J* csr_col_ind_C, T* csr_val_C, rocsparse_index_base base_C) { const T a = *alpha; const T b = *beta; const J incx = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? lda : 1) : ((transA == rocsparse_operation_none) ? 1 : lda); const J incy = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? 1 : ldb) : ((transB == rocsparse_operation_none) ? ldb : 1); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { for(I at = csr_row_ptr_C[i] - base_C; at < csr_row_ptr_C[i + 1] - base_C; ++at) { J j = csr_col_ind_C[at] - base_C; const T* x = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? (A + i) : (A + lda * i)) : ((transA == rocsparse_operation_none) ? (A + lda * i) : (A + i)); const T* y = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? (B + ldb * j) : (B + j)) : ((transB == rocsparse_operation_none) ? (B + j) : (B + ldb * j)); T sum = static_cast(0); for(J k = 0; k < K; ++k) { sum += x[incx * k] * y[incy * k]; } csr_val_C[at] = csr_val_C[at] * b + a * sum; } } } template void rocsparse_host::ellddmm(rocsparse_operation transA, rocsparse_operation transB, rocsparse_order orderA, rocsparse_order orderB, J M, J N, J K, I nnz, const T* alpha, const T* A, J lda, const T* B, J ldb, const T* beta, const J ell_width, const I* ell_ind_C, T* ell_val_C, rocsparse_index_base ell_base) { const T a = *alpha; const T b = *beta; const J incx = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? lda : 1) : ((transA == rocsparse_operation_none) ? 1 : lda); const J incy = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? 1 : ldb) : ((transB == rocsparse_operation_none) ? ldb : 1); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { for(J p = 0; p < ell_width; ++p) { I at = p * M + i; J j = ell_ind_C[at] - ell_base; if(j >= 0 && j < N) { const T* x = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? (A + i) : (A + lda * i)) : ((transA == rocsparse_operation_none) ? (A + lda * i) : (A + i)); const T* y = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? (B + ldb * j) : (B + j)) : ((transB == rocsparse_operation_none) ? (B + j) : (B + ldb * j)); T sum = static_cast(0); for(J k = 0; k < K; ++k) { sum += x[incx * k] * y[incy * k]; } ell_val_C[at] = ell_val_C[at] * b + a * sum; } } } } template void rocsparse_host::cscddmm(rocsparse_operation transA, rocsparse_operation transB, rocsparse_order orderA, rocsparse_order orderB, J M, J N, J K, I nnz, const T* alpha, const T* A, J lda, const T* B, J ldb, const T* beta, const I* csr_ptr_C, const J* csr_ind_C, T* csr_val_C, rocsparse_index_base base_C) { const T a = *alpha; const T b = *beta; const J incx = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? lda : 1) : ((transA == rocsparse_operation_none) ? 1 : lda); const J incy = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? 1 : ldb) : ((transB == rocsparse_operation_none) ? ldb : 1); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J j = 0; j < N; ++j) { for(I at = csr_ptr_C[j] - base_C; at < csr_ptr_C[j + 1] - base_C; ++at) { J i = csr_ind_C[at] - base_C; const T* x = (orderA == rocsparse_order_column) ? ((transA == rocsparse_operation_none) ? (A + i) : (A + lda * i)) : ((transA == rocsparse_operation_none) ? (A + lda * i) : (A + i)); const T* y = (orderB == rocsparse_order_column) ? ((transB == rocsparse_operation_none) ? (B + ldb * j) : (B + j)) : ((transB == rocsparse_operation_none) ? (B + j) : (B + ldb * j)); T sum = static_cast(0); for(J k = 0; k < K; ++k) { sum += x[incx * k] * y[incy * k]; } csr_val_C[at] = csr_val_C[at] * b + a * sum; } } } /* * =========================================================================== * precond SPARSE * =========================================================================== */ template void host_bsric0(rocsparse_direction direction, rocsparse_int Mb, rocsparse_int block_dim, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot) { rocsparse_int M = Mb * block_dim; // Initialize pivot *struct_pivot = -1; *numeric_pivot = -1; // pointer of upper part of each row std::vector diag_block_offset(Mb); std::vector diag_offset(M, -1); std::vector nnz_entries(M, -1); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < Mb; i++) { rocsparse_int row_begin = bsr_row_ptr[i] - base; rocsparse_int row_end = bsr_row_ptr[i + 1] - base; for(rocsparse_int j = row_begin; j < row_end; j++) { if(bsr_col_ind[j] - base == i) { diag_block_offset[i] = j; break; } } } for(rocsparse_int i = 0; i < M; i++) { rocsparse_int local_row = i % block_dim; rocsparse_int row_begin = bsr_row_ptr[i / block_dim] - base; rocsparse_int row_end = bsr_row_ptr[i / block_dim + 1] - base; for(rocsparse_int j = row_begin; j < row_end; j++) { rocsparse_int block_col_j = bsr_col_ind[j] - base; for(rocsparse_int k = 0; k < block_dim; k++) { if(direction == rocsparse_direction_row) { nnz_entries[block_dim * block_col_j + k] = block_dim * block_dim * j + block_dim * local_row + k; } else { nnz_entries[block_dim * block_col_j + k] = block_dim * block_dim * j + block_dim * k + local_row; } } } T sum = static_cast(0); rocsparse_int diag_val_index = -1; bool has_diag = false; bool break_outer_loop = false; for(rocsparse_int j = row_begin; j < row_end; j++) { rocsparse_int block_col_j = bsr_col_ind[j] - base; for(rocsparse_int k = 0; k < block_dim; k++) { rocsparse_int col_j = block_dim * block_col_j + k; // Mark diagonal and skip row if(col_j == i) { diag_val_index = block_dim * block_dim * j + block_dim * k + k; has_diag = true; break_outer_loop = true; break; } // Skip upper triangular if(col_j > i) { break_outer_loop = true; break; } T val_j = static_cast(0); if(direction == rocsparse_direction_row) { val_j = bsr_val[block_dim * block_dim * j + block_dim * local_row + k]; } else { val_j = bsr_val[block_dim * block_dim * j + block_dim * k + local_row]; } rocsparse_int local_row_j = col_j % block_dim; rocsparse_int row_begin_j = bsr_row_ptr[col_j / block_dim] - base; rocsparse_int row_end_j = diag_block_offset[col_j / block_dim]; rocsparse_int row_diag_j = diag_offset[col_j]; T local_sum = static_cast(0); T inv_diag = row_diag_j != -1 ? bsr_val[row_diag_j] : static_cast(0); // Check for numeric zero if(inv_diag == static_cast(0)) { // Numerical non-invertible block diagonal if(*numeric_pivot == -1) { *numeric_pivot = block_col_j + base; } *numeric_pivot = std::min(*numeric_pivot, block_col_j + base); inv_diag = static_cast(1); } inv_diag = static_cast(1) / inv_diag; // loop over upper offset pointer and do linear combination for nnz entry for(rocsparse_int l = row_begin_j; l < row_end_j + 1; l++) { rocsparse_int block_col_l = bsr_col_ind[l] - base; for(rocsparse_int m = 0; m < block_dim; m++) { rocsparse_int idx = nnz_entries[block_dim * block_col_l + m]; if(idx != -1 && block_dim * block_col_l + m < col_j) { if(direction == rocsparse_direction_row) { local_sum = std::fma(bsr_val[block_dim * block_dim * l + block_dim * local_row_j + m], rocsparse_conj(bsr_val[idx]), local_sum); } else { local_sum = std::fma(bsr_val[block_dim * block_dim * l + block_dim * m + local_row_j], rocsparse_conj(bsr_val[idx]), local_sum); } } } } val_j = (val_j - local_sum) * inv_diag; sum = std::fma(val_j, rocsparse_conj(val_j), sum); if(direction == rocsparse_direction_row) { bsr_val[block_dim * block_dim * j + block_dim * local_row + k] = val_j; } else { bsr_val[block_dim * block_dim * j + block_dim * k + local_row] = val_j; } } if(break_outer_loop) { break; } } if(!has_diag) { // Structural missing block diagonal if(*struct_pivot == -1) { *struct_pivot = i / block_dim + base; } } // Process diagonal entry if(has_diag) { T diag_entry = std::sqrt(std::abs(bsr_val[diag_val_index] - sum)); bsr_val[diag_val_index] = diag_entry; if(diag_entry == static_cast(0)) { // Numerical non-invertible block diagonal if(*numeric_pivot == -1) { *numeric_pivot = i / block_dim + base; } *numeric_pivot = std::min(*numeric_pivot, i / block_dim + base); } // Store diagonal offset diag_offset[i] = diag_val_index; } for(rocsparse_int j = row_begin; j < row_end; j++) { rocsparse_int block_col_j = bsr_col_ind[j] - base; for(rocsparse_int k = 0; k < block_dim; k++) { if(direction == rocsparse_direction_row) { nnz_entries[block_dim * block_col_j + k] = -1; } else { nnz_entries[block_dim * block_col_j + k] = -1; } } } } } template void host_bsrilu0(rocsparse_direction dir, rocsparse_int mb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_int bsr_dim, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, U boost_tol, T boost_val) { // Initialize pivots *struct_pivot = mb + 1; *numeric_pivot = mb + 1; // Temporary vector to hold diagonal offset to access diagonal BSR block std::vector diag_offset(mb); std::vector nnz_entries(mb, -1); // First diagonal block is index 0 diag_offset[0] = 0; // Loop over all BSR rows for(rocsparse_int i = 0; i < mb; ++i) { // Flag whether we have a diagonal block or not bool has_diag = false; // BSR column entry and exit point rocsparse_int row_begin = bsr_row_ptr[i] - base; rocsparse_int row_end = bsr_row_ptr[i + 1] - base; rocsparse_int j; // Set up entry points for linear combination for(j = row_begin; j < row_end; ++j) { rocsparse_int col_j = bsr_col_ind[j] - base; nnz_entries[col_j] = j; } // Process lower diagonal BSR blocks (diagonal BSR block is excluded) for(j = row_begin; j < row_end; ++j) { // Column index of current BSR block rocsparse_int bsr_col = bsr_col_ind[j] - base; // If this is a diagonal block, set diagonal flag to true and skip // all upcoming blocks as we exceed the lower matrix part if(bsr_col == i) { has_diag = true; break; } // Skip all upper matrix blocks if(bsr_col > i) { break; } // Process all lower matrix BSR blocks // Obtain corresponding row entry and exit point that corresponds with the // current BSR column. Actually, we skip all lower matrix column indices, // therefore starting with the diagonal entry. rocsparse_int diag_j = diag_offset[bsr_col]; rocsparse_int row_end_j = bsr_row_ptr[bsr_col + 1] - base; // Loop through all rows within the BSR block for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { T diag = bsr_val[BSR_IND(diag_j, bi, bi, dir)]; // Process all rows within the BSR block for(rocsparse_int bk = 0; bk < bsr_dim; ++bk) { T val = bsr_val[BSR_IND(j, bk, bi, dir)]; // Multiplication factor bsr_val[BSR_IND(j, bk, bi, dir)] = val /= diag; // Loop through columns of bk-th row and do linear combination for(rocsparse_int bj = bi + 1; bj < bsr_dim; ++bj) { bsr_val[BSR_IND(j, bk, bj, dir)] = std::fma(-val, bsr_val[BSR_IND(diag_j, bi, bj, dir)], bsr_val[BSR_IND(j, bk, bj, dir)]); } } } // Loop over upper offset pointer and do linear combination for nnz entry for(rocsparse_int k = diag_j + 1; k < row_end_j; ++k) { rocsparse_int bsr_col_k = bsr_col_ind[k] - base; if(nnz_entries[bsr_col_k] != -1) { rocsparse_int m = nnz_entries[bsr_col_k]; // Loop through all rows within the BSR block for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { // Loop through columns of bi-th row and do linear combination for(rocsparse_int bj = 0; bj < bsr_dim; ++bj) { T sum = static_cast(0); for(rocsparse_int bk = 0; bk < bsr_dim; ++bk) { sum = std::fma(bsr_val[BSR_IND(j, bi, bk, dir)], bsr_val[BSR_IND(k, bk, bj, dir)], sum); } bsr_val[BSR_IND(m, bi, bj, dir)] -= sum; } } } } } // Check for structural pivot if(!has_diag) { *struct_pivot = std::min(*struct_pivot, i + base); break; } // Process diagonal if(bsr_col_ind[j] - base == i) { // Loop through all rows within the BSR block for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { T diag = bsr_val[BSR_IND(j, bi, bi, dir)]; if(boost) { diag = (boost_tol >= std::abs(diag)) ? boost_val : diag; bsr_val[BSR_IND(j, bi, bi, dir)] = diag; } else { // Check for numeric pivot if(diag == static_cast(0)) { *numeric_pivot = std::min(*numeric_pivot, bsr_col_ind[j]); continue; } } // Process all rows within the BSR block after bi-th row for(rocsparse_int bk = bi + 1; bk < bsr_dim; ++bk) { T val = bsr_val[BSR_IND(j, bk, bi, dir)]; // Multiplication factor bsr_val[BSR_IND(j, bk, bi, dir)] = val /= diag; // Loop through remaining columns of bk-th row and do linear combination for(rocsparse_int bj = bi + 1; bj < bsr_dim; ++bj) { bsr_val[BSR_IND(j, bk, bj, dir)] = std::fma(-val, bsr_val[BSR_IND(j, bi, bj, dir)], bsr_val[BSR_IND(j, bk, bj, dir)]); } } } } // Store diagonal BSR block entry point rocsparse_int row_diag = diag_offset[i] = j; // Process upper diagonal BSR blocks for(j = row_diag + 1; j < row_end; ++j) { // Loop through all rows within the BSR block for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { // Process all rows within the BSR block after bi-th row for(rocsparse_int bk = bi + 1; bk < bsr_dim; ++bk) { // Loop through columns of bk-th row and do linear combination for(rocsparse_int bj = 0; bj < bsr_dim; ++bj) { bsr_val[BSR_IND(j, bk, bj, dir)] = std::fma(-bsr_val[BSR_IND(row_diag, bk, bi, dir)], bsr_val[BSR_IND(j, bi, bj, dir)], bsr_val[BSR_IND(j, bk, bj, dir)]); } } } } // Reset entry points for(j = row_begin; j < row_end; ++j) { rocsparse_int col_j = bsr_col_ind[j] - base; nnz_entries[col_j] = -1; } } *struct_pivot = (*struct_pivot == mb + 1) ? -1 : *struct_pivot; *numeric_pivot = (*numeric_pivot == mb + 1) ? -1 : *numeric_pivot; } template void host_csric0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot) { // Initialize pivot *struct_pivot = -1; *numeric_pivot = -1; // pointer of upper part of each row std::vector diag_offset(M); std::vector nnz_entries(M, 0); // ai = 0 to N loop over all rows for(rocsparse_int ai = 0; ai < M; ++ai) { // ai-th row entries rocsparse_int row_begin = csr_row_ptr[ai] - base; rocsparse_int row_end = csr_row_ptr[ai + 1] - base; rocsparse_int j; // nnz position of ai-th row in val array for(j = row_begin; j < row_end; ++j) { nnz_entries[csr_col_ind[j] - base] = j; } T sum = static_cast(0); bool has_diag = false; // loop over ai-th row nnz entries for(j = row_begin; j < row_end; ++j) { rocsparse_int col_j = csr_col_ind[j] - base; T val_j = csr_val[j]; // Mark diagonal and skip row if(col_j == ai) { has_diag = true; break; } // Skip upper triangular if(col_j > ai) { break; } rocsparse_int row_begin_j = csr_row_ptr[col_j] - base; rocsparse_int row_diag_j = diag_offset[col_j]; T local_sum = static_cast(0); T inv_diag = csr_val[row_diag_j]; // Check for numeric zero if(inv_diag == static_cast(0)) { // Numerical zero diagonal *numeric_pivot = col_j + base; return; } inv_diag = static_cast(1) / inv_diag; // loop over upper offset pointer and do linear combination for nnz entry for(rocsparse_int k = row_begin_j; k < row_diag_j; ++k) { rocsparse_int col_k = csr_col_ind[k] - base; // if nnz at this position do linear combination if(nnz_entries[col_k] != 0) { rocsparse_int idx = nnz_entries[col_k]; local_sum = std::fma(csr_val[k], rocsparse_conj(csr_val[idx]), local_sum); } } val_j = (val_j - local_sum) * inv_diag; sum = std::fma(val_j, rocsparse_conj(val_j), sum); csr_val[j] = val_j; } if(!has_diag) { // Structural (and numerical) zero diagonal *struct_pivot = ai + base; *numeric_pivot = ai + base; return; } // Process diagonal entry T diag_entry = std::sqrt(std::abs(csr_val[j] - sum)); csr_val[j] = diag_entry; // Store diagonal offset diag_offset[ai] = j; // clear nnz entries for(j = row_begin; j < row_end; ++j) { nnz_entries[csr_col_ind[j] - base] = 0; } } } template void host_csrilu0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, U boost_tol, T boost_val) { // Initialize pivot *struct_pivot = -1; *numeric_pivot = -1; // pointer of upper part of each row std::vector diag_offset(M); std::vector nnz_entries(M, 0); // ai = 0 to N loop over all rows for(rocsparse_int ai = 0; ai < M; ++ai) { // ai-th row entries rocsparse_int row_begin = csr_row_ptr[ai] - base; rocsparse_int row_end = csr_row_ptr[ai + 1] - base; rocsparse_int j; // nnz position of ai-th row in val array for(j = row_begin; j < row_end; ++j) { nnz_entries[csr_col_ind[j] - base] = j; } bool has_diag = false; // loop over ai-th row nnz entries for(j = row_begin; j < row_end; ++j) { // if nnz entry is in lower matrix if(csr_col_ind[j] - base < ai) { rocsparse_int col_j = csr_col_ind[j] - base; rocsparse_int diag_j = diag_offset[col_j]; T diag_val = csr_val[diag_j]; if(boost) { diag_val = (boost_tol >= std::abs(diag_val)) ? boost_val : diag_val; csr_val[diag_j] = diag_val; } else { // Check for numeric pivot if(diag_val == static_cast(0)) { *numeric_pivot = col_j + base; return; } } // multiplication factor csr_val[j] = csr_val[j] / diag_val; // loop over upper offset pointer and do linear combination for nnz entry for(rocsparse_int k = diag_j + 1; k < csr_row_ptr[col_j + 1] - base; ++k) { // if nnz at this position do linear combination if(nnz_entries[csr_col_ind[k] - base] != 0) { rocsparse_int idx = nnz_entries[csr_col_ind[k] - base]; csr_val[idx] = std::fma(-csr_val[j], csr_val[k], csr_val[idx]); } } } else if(csr_col_ind[j] - base == ai) { has_diag = true; break; } else { break; } } if(!has_diag) { // Structural (and numerical) zero diagonal *struct_pivot = ai + base; *numeric_pivot = ai + base; return; } // set diagonal pointer to diagonal element diag_offset[ai] = j; // clear nnz entries for(j = row_begin; j < row_end; ++j) { nnz_entries[csr_col_ind[j] - base] = 0; } } } // Parallel Cyclic reduction based on paper "Fast Tridiagonal Solvers on the GPU" by Yao Zhang template void host_gtsv_no_pivot(rocsparse_int m, rocsparse_int n, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& B, rocsparse_int ldb) { // // Compute BLOCKSIZE as the lowest power of 2 greater or equal than m, // and compute the exponent 'iter' of (BLOCKSIZE / 2). // Note: if m = 1 then the calculation below leads to iter = -1 which is, in this algorithm, as acceptable as log2(m / 2) = -Inf. // rocsparse_int iter; size_t BLOCKSIZE; for(iter = 0, BLOCKSIZE = 1; BLOCKSIZE < m; BLOCKSIZE <<= 1, ++iter) ; --iter; for(rocsparse_int col = 0; col < n; col++) { rocsparse_int stride = 1; std::vector sa(BLOCKSIZE, static_cast(0)); std::vector sb(BLOCKSIZE, static_cast(0)); std::vector sc(BLOCKSIZE, static_cast(0)); std::vector srhs(BLOCKSIZE, static_cast(0)); std::vector a(BLOCKSIZE, static_cast(0)); std::vector b(BLOCKSIZE, static_cast(0)); std::vector c(BLOCKSIZE, static_cast(0)); std::vector rhs(BLOCKSIZE, static_cast(0)); std::vector x(BLOCKSIZE, static_cast(0)); for(rocsparse_int i = 0; i < m; i++) { a[i] = dl[i]; b[i] = d[i]; c[i] = du[i]; rhs[i] = B[ldb * col + i]; } for(rocsparse_int j = 0; j < iter; j++) { for(rocsparse_int tid = 0; tid < BLOCKSIZE; tid++) { rocsparse_int right = tid + stride; if(right >= m) right = m - 1; rocsparse_int left = tid - stride; if(left < 0) left = 0; T k1 = a[tid] / b[left]; T k2 = c[tid] / b[right]; T tb = b[tid] - c[left] * k1 - a[right] * k2; T trhs = rhs[tid] - rhs[left] * k1 - rhs[right] * k2; T ta = -a[left] * k1; T tc = -c[right] * k2; sb[tid] = tb; srhs[tid] = trhs; sa[tid] = ta; sc[tid] = tc; } for(rocsparse_int tid = 0; tid < BLOCKSIZE; tid++) { a[tid] = sa[tid]; b[tid] = sb[tid]; c[tid] = sc[tid]; rhs[tid] = srhs[tid]; } stride *= 2; } for(rocsparse_int tid = 0; tid < BLOCKSIZE; tid++) { if(tid < BLOCKSIZE / 2) { rocsparse_int i = tid; rocsparse_int j = tid + stride; if(j < m) { // Solve 2x2 systems T det = b[j] * b[i] - c[i] * a[j]; x[i] = (b[j] * rhs[i] - c[i] * rhs[j]) / det; x[j] = (rhs[j] * b[i] - rhs[i] * a[j]) / det; } else { // Solve 1x1 systems x[i] = rhs[i] / b[i]; } } } for(rocsparse_int i = 0; i < m; i++) { B[ldb * col + i] = x[i]; } } } // Parallel Cyclic reduction based on paper "Fast Tridiagonal Solvers on the GPU" by Yao Zhang template void host_gtsv_no_pivot_strided_batch(rocsparse_int m, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& x, rocsparse_int batch_count, rocsparse_int batch_stride) { // // Compute BLOCKSIZE as the lowest power of 2 greater or equal than m, // and compute the exponent 'iter' of (BLOCKSIZE / 2). // Note: if m = 1 then the calculation below leads to iter = -1 which is, in this algorithm, as acceptable as log2(m / 2) = -Inf. // rocsparse_int iter; size_t BLOCKSIZE; for(iter = 0, BLOCKSIZE = 1; BLOCKSIZE < m; BLOCKSIZE <<= 1, ++iter) ; --iter; for(rocsparse_int col = 0; col < batch_count; col++) { rocsparse_int stride = 1; std::vector sa(BLOCKSIZE, static_cast(0)); std::vector sb(BLOCKSIZE, static_cast(0)); std::vector sc(BLOCKSIZE, static_cast(0)); std::vector srhs(BLOCKSIZE, static_cast(0)); std::vector a(BLOCKSIZE, static_cast(0)); std::vector b(BLOCKSIZE, static_cast(0)); std::vector c(BLOCKSIZE, static_cast(0)); std::vector rhs(BLOCKSIZE, static_cast(0)); std::vector y(BLOCKSIZE, static_cast(0)); for(rocsparse_int i = 0; i < m; i++) { a[i] = dl[batch_stride * col + i]; b[i] = d[batch_stride * col + i]; c[i] = du[batch_stride * col + i]; rhs[i] = x[batch_stride * col + i]; } for(rocsparse_int j = 0; j < iter; j++) { for(rocsparse_int tid = 0; tid < BLOCKSIZE; tid++) { rocsparse_int right = tid + stride; if(right >= m) right = m - 1; rocsparse_int left = tid - stride; if(left < 0) left = 0; T k1 = a[tid] / b[left]; T k2 = c[tid] / b[right]; T tb = b[tid] - c[left] * k1 - a[right] * k2; T trhs = rhs[tid] - rhs[left] * k1 - rhs[right] * k2; T ta = -a[left] * k1; T tc = -c[right] * k2; sb[tid] = tb; srhs[tid] = trhs; sa[tid] = ta; sc[tid] = tc; } for(rocsparse_int tid = 0; tid < BLOCKSIZE; tid++) { a[tid] = sa[tid]; b[tid] = sb[tid]; c[tid] = sc[tid]; rhs[tid] = srhs[tid]; } stride *= 2; } for(rocsparse_int tid = 0; tid < BLOCKSIZE; tid++) { if(tid < BLOCKSIZE / 2) { rocsparse_int i = tid; rocsparse_int j = tid + stride; if(j < m) { // Solve 2x2 systems T det = b[j] * b[i] - c[i] * a[j]; y[i] = (b[j] * rhs[i] - c[i] * rhs[j]) / det; y[j] = (rhs[j] * b[i] - rhs[i] * a[j]) / det; } else { // Solve 1x1 systems y[i] = rhs[i] / b[i]; } } } for(rocsparse_int i = 0; i < m; i++) { x[batch_stride * col + i] = y[i]; } } } template void host_gtsv_interleaved_batch_thomas(rocsparse_int m, const T* dl, const T* d, const T* du, T* x, rocsparse_int batch_count, rocsparse_int batch_stride) { std::vector c1(m * batch_count, 0); std::vector x1(m * batch_count, 0); // Forward elimination #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { c1[j] = du[j] / d[j]; x1[j] = x[j] / d[j]; } for(rocsparse_int i = 1; i < m; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { rocsparse_int index = batch_count * i + j; rocsparse_int minus = batch_count * (i - 1) + j; T tdu = du[batch_stride * i + j]; T td = d[batch_stride * i + j]; T tdl = dl[batch_stride * i + j]; T tx = x[batch_stride * i + j]; c1[index] = tdu / (td - c1[minus] * tdl); x1[index] = (tx - x1[minus] * tdl) / (td - c1[minus] * tdl); } } // backward substitution #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * (m - 1) + j] = x1[batch_count * (m - 1) + j]; } for(rocsparse_int i = m - 2; i >= 0; i--) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { rocsparse_int index = batch_count * i + j; x[batch_stride * i + j] = x1[index] - c1[index] * x[batch_stride * (i + 1) + j]; } } } template void host_gtsv_interleaved_batch_lu(rocsparse_int m, const T* dl, const T* d, const T* du, T* x, rocsparse_int batch_count, rocsparse_int batch_stride) { std::vector l(m * batch_count, 0); std::vector u0(m * batch_count, 0); std::vector u1(m * batch_count, 0); std::vector u2(m * batch_count, 0); std::vector p(m * batch_count, 0); for(rocsparse_int i = 0; i < m; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { l[batch_count * i + j] = dl[batch_stride * i + j]; u0[batch_count * i + j] = d[batch_stride * i + j]; u1[batch_count * i + j] = du[batch_stride * i + j]; } } // LU decomposition for(rocsparse_int i = 0; i < m - 1; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { T ak_1 = l[batch_count * (i + 1) + j]; T bk = u0[batch_count * i + j]; if(std::abs(bk) < std::abs(ak_1)) { T bk_1 = u0[batch_count * (i + 1) + j]; T ck = u1[batch_count * i + j]; T ck_1 = u1[batch_count * (i + 1) + j]; T dk = u2[batch_count * i + j]; u0[batch_count * i + j] = ak_1; u1[batch_count * i + j] = bk_1; u2[batch_count * i + j] = ck_1; u0[batch_count * (i + 1) + j] = ck; u1[batch_count * (i + 1) + j] = dk; rocsparse_int pk = p[batch_count * i + j]; p[batch_count * i + j] = i + 1; p[batch_count * (i + 1) + j] = pk; T xk = x[batch_stride * i + j]; x[batch_stride * i + j] = x[batch_stride * (i + 1) + j]; x[batch_stride * (i + 1) + j] = xk; T lk_1 = bk / ak_1; l[batch_count * (i + 1) + j] = lk_1; u0[batch_count * (i + 1) + j] = u0[batch_count * (i + 1) + j] - lk_1 * u1[batch_count * i + j]; u1[batch_count * (i + 1) + j] = u1[batch_count * (i + 1) + j] - lk_1 * u2[batch_count * i + j]; } else { p[batch_count * (i + 1) + j] = i + 1; T lk_1 = ak_1 / bk; l[batch_count * (i + 1) + j] = lk_1; u0[batch_count * (i + 1) + j] = u0[batch_count * (i + 1) + j] - lk_1 * u1[batch_count * i + j]; u1[batch_count * (i + 1) + j] = u1[batch_count * (i + 1) + j] - lk_1 * u2[batch_count * i + j]; } } } // Forward elimination (L * x_new = x_old) std::vector start(batch_count, 0); for(rocsparse_int i = 1; i < m; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { if(p[batch_count * i + j] <= i) // no pivoting occured, sum up result { T temp = static_cast(0); for(rocsparse_int s = start[j]; s < i; s++) { temp = temp - l[batch_count * (s + 1) + j] * x[batch_stride * s + j]; } x[batch_stride * i + j] = x[batch_stride * i + j] + temp; start[j] += i - start[j]; } } } // backward substitution (U * x_newest = x_new) #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * (m - 1) + j] = x[batch_stride * (m - 1) + j] / u0[batch_count * (m - 1) + j]; x[batch_stride * (m - 2) + j] = (x[batch_stride * (m - 2) + j] - u1[batch_count * (m - 2) + j] * x[batch_stride * (m - 1) + j]) / u0[batch_count * (m - 2) + j]; } for(rocsparse_int i = m - 3; i >= 0; i--) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * i + j] = (x[batch_stride * i + j] - u1[batch_count * i + j] * x[batch_stride * (i + 1) + j] - u2[batch_count * i + j] * x[batch_stride * (i + 2) + j]) / u0[batch_count * i + j]; } } } template void host_gtsv_interleaved_batch_qr(rocsparse_int m, const T* dl, const T* d, const T* du, T* x, rocsparse_int batch_count, rocsparse_int batch_stride) { std::vector r0(m * batch_count, 0); std::vector r1(m * batch_count, 0); std::vector r2(m * batch_count, 0); for(rocsparse_int i = 0; i < m; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { r0[batch_count * i + j] = d[batch_stride * i + j]; r1[batch_count * i + j] = du[batch_stride * i + j]; } } // Reduce A = Q*R where Q is orthonormal and R is upper triangular // This means when solving A * x = b // => Q * R * x = b // => Q' * Q * R * x = Q' * b // => R * x = Q' * b // Because A is tri-diagonal, we use Givens rotations // Note on notation used here. I consider the A matrix to have form: // A = b0 c0 0 0 0 // a1 b1 c1 0 0 // 0 a2 b2 c2 0 // 0 0 a3 b3 c3 // 0 0 0 a4 b4 for(rocsparse_int i = 0; i < m - 1; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { T ak_1 = dl[batch_stride * (i + 1) + j]; T bk = r0[batch_count * i + j]; T bk_1 = r0[batch_count * (i + 1) + j]; T ck = r1[batch_count * i + j]; T ck_1 = r1[batch_count * (i + 1) + j]; T radius = std::sqrt(std::abs(bk * rocsparse_conj(bk) + ak_1 * rocsparse_conj(ak_1))); // Apply Givens rotation // | cos sin | |bk ck 0 | // |-sin cos | |ak_1 bk_1 ck_1| T cos_theta = rocsparse_conj(bk) / radius; T sin_theta = rocsparse_conj(ak_1) / radius; r0[batch_count * i + j] = std::fma(bk, cos_theta, ak_1 * sin_theta); r0[batch_count * (i + 1) + j] = std::fma(-ck, rocsparse_conj(sin_theta), bk_1 * rocsparse_conj(cos_theta)); r1[batch_count * i + j] = std::fma(ck, cos_theta, bk_1 * sin_theta); r1[batch_count * (i + 1) + j] = ck_1 * rocsparse_conj(cos_theta); r2[batch_count * i + j] = ck_1 * sin_theta; // Apply Givens rotation to rhs vector // | cos sin | |xk | // |-sin cos | |xk_1| T xk = x[batch_stride * i + j]; T xk_1 = x[batch_stride * (i + 1) + j]; x[batch_stride * i + j] = std::fma(xk, cos_theta, xk_1 * sin_theta); x[batch_stride * (i + 1) + j] = std::fma(-xk, rocsparse_conj(sin_theta), xk_1 * rocsparse_conj(cos_theta)); } } // Backward substitution on upper triangular R * x = x #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * (m - 1) + j] = x[batch_stride * (m - 1) + j] / r0[batch_count * (m - 1) + j]; x[batch_stride * (m - 2) + j] = (x[batch_stride * (m - 2) + j] - r1[batch_count * (m - 2) + j] * x[batch_stride * (m - 1) + j]) / r0[batch_count * (m - 2) + j]; } for(rocsparse_int i = m - 3; i >= 0; i--) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * i + j] = (x[batch_stride * i + j] - r1[batch_count * i + j] * x[batch_stride * (i + 1) + j] - r2[batch_count * i + j] * x[batch_stride * (i + 2) + j]) / r0[batch_count * i + j]; } } } template void host_gtsv_interleaved_batch(rocsparse_gtsv_interleaved_alg algo, rocsparse_int m, const T* dl, const T* d, const T* du, T* x, rocsparse_int batch_count, rocsparse_int batch_stride) { switch(algo) { case rocsparse_gtsv_interleaved_alg_thomas: { host_gtsv_interleaved_batch_thomas(m, dl, d, du, x, batch_count, batch_stride); break; } case rocsparse_gtsv_interleaved_alg_lu: { host_gtsv_interleaved_batch_lu(m, dl, d, du, x, batch_count, batch_stride); break; } case rocsparse_gtsv_interleaved_alg_default: case rocsparse_gtsv_interleaved_alg_qr: { host_gtsv_interleaved_batch_qr(m, dl, d, du, x, batch_count, batch_stride); break; } } } template void host_gpsv_interleaved_batch_qr(rocsparse_int m, T* ds, T* dl, T* d, T* du, T* dw, T* x, rocsparse_int batch_count, rocsparse_int batch_stride) { std::vector r3(m * batch_count, 0); std::vector r4(m * batch_count, 0); // Reduce A = Q*R where Q is orthonormal and R is upper triangular // This means when solving A * x = b // => Q * R * x = b // => Q' * Q * R * x = Q' * b // => R * x = Q' * b // Because A is penta-diagonal, we use Givens rotations // Note on notation used here. I consider the A matrix to have form: // A = d0 u0 w0 0 0 // l1 d1 u1 w1 0 // s2 l2 d2 u2 w2 // 0 s3 l3 d3 u3 // 0 0 s4 l4 d4 for(rocsparse_int i = 0; i < m - 2; i++) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { // For penta diagonal matrices, need to apply two givens rotations to remove lower and lower - 1 entries T radius = static_cast(0); T cos_theta = static_cast(0); T sin_theta = static_cast(0); // Apply first Givens rotation // | cos sin | |lk_1 dk_1 uk_1 wk_1 0 | // |-sin cos | |sk_2 lk_2 dk_2 uk_2 wk_2| T sk_2 = ds[batch_stride * (i + 2) + j]; T lk_1 = dl[batch_stride * (i + 1) + j]; T lk_2 = dl[batch_stride * (i + 2) + j]; T dk_1 = d[batch_stride * (i + 1) + j]; T dk_2 = d[batch_stride * (i + 2) + j]; T uk_1 = du[batch_stride * (i + 1) + j]; T uk_2 = du[batch_stride * (i + 2) + j]; T wk_1 = dw[batch_stride * (i + 1) + j]; T wk_2 = dw[batch_stride * (i + 2) + j]; radius = std::sqrt( std::abs(std::fma(lk_1, rocsparse_conj(lk_1), sk_2 * rocsparse_conj(sk_2)))); cos_theta = rocsparse_conj(lk_1) / radius; sin_theta = rocsparse_conj(sk_2) / radius; T dlk_1_new = std::fma(lk_1, cos_theta, sk_2 * sin_theta); T dk_1_new = std::fma(dk_1, cos_theta, lk_2 * sin_theta); T duk_1_new = std::fma(uk_1, cos_theta, dk_2 * sin_theta); T dwk_1_new = std::fma(wk_1, cos_theta, uk_2 * sin_theta); dl[batch_stride * (i + 1) + j] = dlk_1_new; dl[batch_stride * (i + 2) + j] = std::fma(-dk_1, rocsparse_conj(sin_theta), lk_2 * rocsparse_conj(cos_theta)); d[batch_stride * (i + 1) + j] = dk_1_new; d[batch_stride * (i + 2) + j] = std::fma(-uk_1, rocsparse_conj(sin_theta), dk_2 * rocsparse_conj(cos_theta)); du[batch_stride * (i + 1) + j] = duk_1_new; du[batch_stride * (i + 2) + j] = std::fma(-wk_1, rocsparse_conj(sin_theta), uk_2 * rocsparse_conj(cos_theta)); dw[batch_stride * (i + 1) + j] = dwk_1_new; dw[batch_stride * (i + 2) + j] = wk_2 * rocsparse_conj(cos_theta); r3[batch_count * (i + 1) + j] = wk_2 * sin_theta; // Apply first Givens rotation to rhs vector // | cos sin | |xk_1| // |-sin cos | |xk_2| T xk_1 = x[batch_stride * (i + 1) + j]; T xk_2 = x[batch_stride * (i + 2) + j]; x[batch_stride * (i + 1) + j] = std::fma(xk_1, cos_theta, xk_2 * sin_theta); x[batch_stride * (i + 2) + j] = std::fma(-xk_1, rocsparse_conj(sin_theta), xk_2 * rocsparse_conj(cos_theta)); // Apply second Givens rotation // | cos sin | |dk uk wk rk 0 | // |-sin cos | |lk_1 dk_1 uk_1 wk_1 rk_1| lk_1 = dlk_1_new; T dk = d[batch_stride * i + j]; dk_1 = dk_1_new; T uk = du[batch_stride * i + j]; uk_1 = duk_1_new; T wk = dw[batch_stride * i + j]; wk_1 = dwk_1_new; T rk = r3[batch_count * i + j]; T rk_1 = r3[batch_count * (i + 1) + j]; radius = std::sqrt( std::abs(std::fma(dk, rocsparse_conj(dk), lk_1 * rocsparse_conj(lk_1)))); cos_theta = rocsparse_conj(dk) / radius; sin_theta = rocsparse_conj(lk_1) / radius; d[batch_stride * i + j] = std::fma(dk, cos_theta, lk_1 * sin_theta); d[batch_stride * (i + 1) + j] = std::fma(-uk, rocsparse_conj(sin_theta), dk_1 * rocsparse_conj(cos_theta)); du[batch_stride * i + j] = std::fma(uk, cos_theta, dk_1 * sin_theta); du[batch_stride * (i + 1) + j] = std::fma(-wk, rocsparse_conj(sin_theta), uk_1 * rocsparse_conj(cos_theta)); dw[batch_stride * i + j] = std::fma(wk, cos_theta, uk_1 * sin_theta); dw[batch_stride * (i + 1) + j] = std::fma(-rk, rocsparse_conj(sin_theta), wk_1 * rocsparse_conj(cos_theta)); r3[batch_count * i + j] = std::fma(rk, cos_theta, wk_1 * sin_theta); r3[batch_count * (i + 1) + j] = rk_1 * rocsparse_conj(cos_theta); r4[batch_count * i + j] = rk_1 * sin_theta; // Apply second Givens rotation to rhs vector // | cos sin | |xk | // |-sin cos | |xk_1| T xk = x[batch_stride * i + j]; xk_1 = x[batch_stride * (i + 1) + j]; x[batch_stride * i + j] = std::fma(xk, cos_theta, xk_1 * sin_theta); x[batch_stride * (i + 1) + j] = std::fma(-xk, rocsparse_conj(sin_theta), xk_1 * rocsparse_conj(cos_theta)); } } // Apply last givens rotation #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { // Apply last Givens rotation // | cos sin | |dk uk wk rk 0 | // |-sin cos | |lk_1 dk_1 uk_1 wk_1 rk_1| T lk_1 = dl[batch_stride * (m - 1) + j]; T dk = d[batch_stride * (m - 2) + j]; T dk_1 = d[batch_stride * (m - 1) + j]; T uk = du[batch_stride * (m - 2) + j]; T uk_1 = du[batch_stride * (m - 1) + j]; T wk = dw[batch_stride * (m - 2) + j]; T wk_1 = dw[batch_stride * (m - 1) + j]; T rk = r3[batch_count * (m - 2) + j]; T rk_1 = r3[batch_count * (m - 1) + j]; T radius = std::sqrt(std::abs(std::fma(dk, rocsparse_conj(dk), lk_1 * rocsparse_conj(lk_1)))); T cos_theta = rocsparse_conj(dk) / radius; T sin_theta = rocsparse_conj(lk_1) / radius; d[batch_stride * (m - 2) + j] = std::fma(dk, cos_theta, lk_1 * sin_theta); d[batch_stride * (m - 1) + j] = std::fma(-uk, rocsparse_conj(sin_theta), dk_1 * rocsparse_conj(cos_theta)); du[batch_stride * (m - 2) + j] = std::fma(uk, cos_theta, dk_1 * sin_theta); du[batch_stride * (m - 1) + j] = std::fma(-wk, rocsparse_conj(sin_theta), uk_1 * rocsparse_conj(cos_theta)); dw[batch_stride * (m - 2) + j] = std::fma(wk, cos_theta, uk_1 * sin_theta); dw[batch_stride * (m - 1) + j] = std::fma(-rk, rocsparse_conj(sin_theta), wk_1 * rocsparse_conj(cos_theta)); r3[batch_count * (m - 2) + j] = std::fma(rk, cos_theta, wk_1 * sin_theta); r3[batch_count * (m - 1) + j] = rk_1 * rocsparse_conj(cos_theta); r4[batch_count * (m - 2) + j] = rk_1 * sin_theta; // Apply last Givens rotation to rhs vector // | cos sin | |xk | // |-sin cos | |xk_1| T xk = x[batch_stride * (m - 2) + j]; T xk_1 = x[batch_stride * (m - 1) + j]; x[batch_stride * (m - 2) + j] = std::fma(xk, cos_theta, xk_1 * sin_theta); x[batch_stride * (m - 1) + j] = std::fma(-xk, rocsparse_conj(sin_theta), xk_1 * rocsparse_conj(cos_theta)); } // Backward substitution on upper triangular R * x = x #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * (m - 1) + j] = x[batch_stride * (m - 1) + j] / d[batch_stride * (m - 1) + j]; x[batch_stride * (m - 2) + j] = (x[batch_stride * (m - 2) + j] - du[batch_stride * (m - 2) + j] * x[batch_stride * (m - 1) + j]) / d[batch_stride * (m - 2) + j]; x[batch_stride * (m - 3) + j] = (x[batch_stride * (m - 3) + j] - du[batch_stride * (m - 3) + j] * x[batch_stride * (m - 2) + j] - dw[batch_stride * (m - 3) + j] * x[batch_stride * (m - 1) + j]) / d[batch_stride * (m - 3) + j]; x[batch_stride * (m - 4) + j] = (x[batch_stride * (m - 4) + j] - du[batch_stride * (m - 4) + j] * x[batch_stride * (m - 3) + j] - dw[batch_stride * (m - 4) + j] * x[batch_stride * (m - 2) + j] - r3[batch_count * (m - 4) + j] * x[batch_stride * (m - 1) + j]) / d[batch_stride * (m - 4) + j]; } for(rocsparse_int i = m - 5; i >= 0; i--) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int j = 0; j < batch_count; j++) { x[batch_stride * i + j] = (x[batch_stride * i + j] - du[batch_stride * i + j] * x[batch_stride * (i + 1) + j] - dw[batch_stride * i + j] * x[batch_stride * (i + 2) + j] - r3[batch_count * i + j] * x[batch_stride * (i + 3) + j] - r4[batch_count * i + j] * x[batch_stride * (i + 4) + j]) / d[batch_stride * i + j]; } } } template void host_gpsv_interleaved_batch(rocsparse_gpsv_interleaved_alg algo, rocsparse_int m, T* ds, T* dl, T* d, T* du, T* dw, T* x, rocsparse_int batch_count, rocsparse_int batch_stride) { switch(algo) { case rocsparse_gpsv_interleaved_alg_default: case rocsparse_gpsv_interleaved_alg_qr: { host_gpsv_interleaved_batch_qr(m, ds, dl, d, du, dw, x, batch_count, batch_stride); break; } } } /* * =========================================================================== * conversion SPARSE * =========================================================================== */ template rocsparse_status host_nnz(rocsparse_direction dirA, rocsparse_int m, rocsparse_int n, const T* A, rocsparse_int lda, rocsparse_int* nnz_per_row_columns, rocsparse_int* nnz_total_dev_host_ptr) { rocsparse_int mn = (dirA == rocsparse_direction_row) ? m : n; for(rocsparse_int j = 0; j < mn; ++j) { nnz_per_row_columns[j] = 0; } for(rocsparse_int j = 0; j < n; ++j) { for(rocsparse_int i = 0; i < m; ++i) { if(A[j * lda + i] != 0) { if(dirA == rocsparse_direction_row) { nnz_per_row_columns[i] += 1; } else { nnz_per_row_columns[j] += 1; } } } } nnz_total_dev_host_ptr[0] = 0; for(rocsparse_int j = 0; j < mn; ++j) { nnz_total_dev_host_ptr[0] += nnz_per_row_columns[j]; } return rocsparse_status_success; } template void host_prune_dense2csr(rocsparse_int m, rocsparse_int n, const std::vector& A, rocsparse_int lda, rocsparse_index_base base, T threshold, rocsparse_int& nnz, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind) { csr_row_ptr.resize(m + 1, 0); csr_row_ptr[0] = base; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < m; i++) { for(rocsparse_int j = 0; j < n; j++) { if(std::abs(A[lda * j + i]) > threshold) { csr_row_ptr[i + 1]++; } } } for(rocsparse_int i = 1; i <= m; i++) { csr_row_ptr[i] += csr_row_ptr[i - 1]; } nnz = csr_row_ptr[m] - csr_row_ptr[0]; csr_col_ind.resize(nnz); csr_val.resize(nnz); rocsparse_int index = 0; for(rocsparse_int i = 0; i < m; i++) { for(rocsparse_int j = 0; j < n; j++) { if(std::abs(A[lda * j + i]) > threshold) { csr_val[index] = A[lda * j + i]; csr_col_ind[index] = j + base; index++; } } } } template void host_prune_dense2csr_by_percentage(rocsparse_int m, rocsparse_int n, const std::vector& A, rocsparse_int lda, rocsparse_index_base base, T percentage, rocsparse_int& nnz, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind) { rocsparse_int nnz_A = m * n; rocsparse_int pos = std::ceil(nnz_A * (percentage / 100)) - 1; pos = std::min(pos, nnz_A - 1); pos = std::max(pos, 0); std::vector sorted_A(m * n); for(rocsparse_int i = 0; i < n; i++) { for(rocsparse_int j = 0; j < m; j++) { sorted_A[m * i + j] = std::abs(A[lda * i + j]); } } std::sort(sorted_A.begin(), sorted_A.end()); T threshold = sorted_A[pos]; host_prune_dense2csr(m, n, A, lda, base, threshold, nnz, csr_val, csr_row_ptr, csr_col_ind); } template void host_dense2csx(J m, J n, rocsparse_index_base base, const T* A, I ld, rocsparse_order order, const I* nnz_per_row_columns, T* csx_val, I* csx_row_col_ptr, J* csx_col_row_ind) { static constexpr T s_zero = {}; J len = (rocsparse_direction_row == DIRA) ? m : n; *csx_row_col_ptr = base; for(J i = 0; i < len; ++i) { csx_row_col_ptr[i + 1] = nnz_per_row_columns[i] + csx_row_col_ptr[i]; } switch(DIRA) { case rocsparse_direction_column: { for(J j = 0; j < n; ++j) { for(J i = 0; i < m; ++i) { if(order == rocsparse_order_column) { if(A[j * ld + i] != s_zero) { *csx_val++ = A[j * ld + i]; *csx_col_row_ind++ = i + base; } } else { if(A[i * ld + j] != s_zero) { *csx_val++ = A[i * ld + j]; *csx_col_row_ind++ = i + base; } } } } break; } case rocsparse_direction_row: { // // Does not matter having an orthogonal traversal ... testing only. // Otherwise, we would use csx_row_ptr_A to store the shifts. // and once the job is done a simple memory move would reinitialize the csx_row_ptr_A to its initial state) // for(J i = 0; i < m; ++i) { for(J j = 0; j < n; ++j) { if(order == rocsparse_order_column) { if(A[j * ld + i] != s_zero) { *csx_val++ = A[j * ld + i]; *csx_col_row_ind++ = j + base; } } else { if(A[i * ld + j] != s_zero) { *csx_val++ = A[i * ld + j]; *csx_col_row_ind++ = j + base; } } } } break; } } } template void host_csx2dense(J m, J n, rocsparse_index_base base, rocsparse_order order, const T* csx_val, const I* csx_row_col_ptr, const J* csx_col_row_ind, T* A, I ld) { if(order == rocsparse_order_column) { for(J col = 0; col < n; ++col) { for(J row = 0; row < m; ++row) { A[row + ld * col] = static_cast(0); } } } else { for(J row = 0; row < m; ++row) { for(J col = 0; col < n; ++col) { A[col + ld * row] = static_cast(0); } } } if(DIRA == rocsparse_direction_column) { for(J col = 0; col < n; ++col) { I start = csx_row_col_ptr[col] - base; I end = csx_row_col_ptr[col + 1] - base; if(order == rocsparse_order_column) { for(I at = start; at < end; ++at) { A[(csx_col_row_ind[at] - base) + ld * col] = csx_val[at]; } } else { for(I at = start; at < end; ++at) { A[col + ld * (csx_col_row_ind[at] - base)] = csx_val[at]; } } } } else { for(J row = 0; row < m; ++row) { I start = csx_row_col_ptr[row] - base; I end = csx_row_col_ptr[row + 1] - base; if(order == rocsparse_order_column) { for(I at = start; at < end; ++at) { A[(csx_col_row_ind[at] - base) * ld + row] = csx_val[at]; } } else { for(I at = start; at < end; ++at) { A[row * ld + (csx_col_row_ind[at] - base)] = csx_val[at]; } } } } } template void host_dense_to_coo(I m, I n, rocsparse_index_base base, const std::vector& A, I ld, rocsparse_order order, const std::vector& nnz_per_row, std::vector& coo_val, std::vector& coo_row_ind, std::vector& coo_col_ind) { // Find number of non-zeros in dense matrix int nnz = 0; for(I i = 0; i < m; ++i) { nnz += nnz_per_row[i]; } coo_val.resize(nnz, static_cast(0)); coo_row_ind.resize(nnz, 0); coo_col_ind.resize(nnz, 0); // Fill COO matrix int index = 0; for(I i = 0; i < m; i++) { for(I j = 0; j < n; j++) { if(order == rocsparse_order_column) { if(A[ld * j + i] != static_cast(0)) { coo_val[index] = A[ld * j + i]; coo_row_ind[index] = i + base; coo_col_ind[index] = j + base; index++; } } else { if(A[ld * i + j] != static_cast(0)) { coo_val[index] = A[ld * i + j]; coo_row_ind[index] = i + base; coo_col_ind[index] = j + base; index++; } } } } } template void host_coo_to_dense(I m, I n, I nnz, rocsparse_index_base base, const std::vector& coo_val, const std::vector& coo_row_ind, const std::vector& coo_col_ind, std::vector& A, I ld, rocsparse_order order) { I nm = order == rocsparse_order_column ? n : m; A.resize(ld * nm); if(order == rocsparse_order_column) { for(I i = 0; i < n; i++) { for(I j = 0; j < m; j++) { A[ld * i + j] = static_cast(0); } } } else { for(I j = 0; j < m; j++) { for(I i = 0; i < n; i++) { A[ld * j + i] = static_cast(0); } } } for(I i = 0; i < nnz; i++) { I row = coo_row_ind[i] - base; I col = coo_col_ind[i] - base; T val = coo_val[i]; if(order == rocsparse_order_column) { A[ld * col + row] = val; } else { A[ld * row + col] = val; } } } template void host_csr_to_csc(J M, J N, I nnz, const I* csr_row_ptr, const J* csr_col_ind, const T* csr_val, std::vector& csc_row_ind, std::vector& csc_col_ptr, std::vector& csc_val, rocsparse_action action, rocsparse_index_base base) { csc_row_ind.resize(nnz); csc_col_ptr.resize(N + 1, 0); csc_val.resize(nnz); // Determine nnz per column for(I i = 0; i < nnz; ++i) { ++csc_col_ptr[csr_col_ind[i] + 1 - base]; } // Scan for(J i = 0; i < N; ++i) { csc_col_ptr[i + 1] += csc_col_ptr[i]; } // Fill row indices and values for(J i = 0; i < M; ++i) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; for(I j = row_begin; j < row_end; ++j) { J col = csr_col_ind[j] - base; I idx = csc_col_ptr[col]; csc_row_ind[idx] = i + base; csc_val[idx] = csr_val[j]; ++csc_col_ptr[col]; } } // Shift column pointer array for(J i = N; i > 0; --i) { csc_col_ptr[i] = csc_col_ptr[i - 1] + base; } csc_col_ptr[0] = base; } template void host_bsr_to_csr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, rocsparse_int block_dim, rocsparse_index_base bsr_base, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind, rocsparse_index_base csr_base) { return host_gebsr_to_csr(direction, mb, nb, nnzb, bsr_val, bsr_row_ptr, bsr_col_ind, block_dim, block_dim, bsr_base, csr_val, csr_row_ptr, csr_col_ind, csr_base); } template void host_csr_to_bsr(rocsparse_direction direction, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const std::vector& csr_val, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, rocsparse_int block_dim, rocsparse_index_base csr_base, std::vector& bsr_val, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, rocsparse_index_base bsr_base) { return host_csr_to_gebsr(direction, m, n, nnz, csr_val, csr_row_ptr, csr_col_ind, block_dim, block_dim, csr_base, bsr_val, bsr_row_ptr, bsr_col_ind, bsr_base); } template void host_csr_to_gebsr(rocsparse_direction direction, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const std::vector& csr_val, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base csr_base, std::vector& bsr_val, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, rocsparse_index_base bsr_base) { rocsparse_int mb = (m + row_block_dim - 1) / row_block_dim; bsr_row_ptr.resize(mb + 1, 0); std::vector temp(nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < nnz; i++) { temp[i] = (csr_col_ind[i] - csr_base) / col_block_dim; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < mb; i++) { rocsparse_int frow = row_block_dim * i; rocsparse_int lrow = row_block_dim * (i + 1); if(lrow > m) { lrow = m; } rocsparse_int start = csr_row_ptr[frow] - csr_base; rocsparse_int end = csr_row_ptr[lrow] - csr_base; std::sort(temp.begin() + start, temp.begin() + end); } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < mb; i++) { rocsparse_int frow = row_block_dim * i; rocsparse_int lrow = row_block_dim * (i + 1); if(lrow > m) { lrow = m; } rocsparse_int start = csr_row_ptr[frow] - csr_base; rocsparse_int end = csr_row_ptr[lrow] - csr_base; rocsparse_int col = -1; rocsparse_int count = 0; for(rocsparse_int j = start; j < end; j++) { if(temp[j] > col) { col = temp[j]; temp[j] = -1; temp[start + count] = col; count++; } else { temp[j] = -1; } } bsr_row_ptr[i + 1] = count; } // fill GEBSR row pointer array bsr_row_ptr[0] = bsr_base; for(rocsparse_int i = 0; i < mb; i++) { bsr_row_ptr[i + 1] += bsr_row_ptr[i]; } rocsparse_int nnzb = bsr_row_ptr[mb] - bsr_row_ptr[0]; bsr_col_ind.resize(nnzb); bsr_val.resize(nnzb * row_block_dim * col_block_dim, 0); // fill GEBSR col indices array { rocsparse_int index = 0; for(rocsparse_int i = 0; i < nnz; i++) { if(temp[i] != -1) { bsr_col_ind[index] = temp[i] + bsr_base; index++; } } } // fill GEBSR values array #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < m; i++) { rocsparse_int start = csr_row_ptr[i] - csr_base; rocsparse_int end = csr_row_ptr[i + 1] - csr_base; rocsparse_int bstart = bsr_row_ptr[i / row_block_dim] - bsr_base; rocsparse_int bend = bsr_row_ptr[i / row_block_dim + 1] - bsr_base; rocsparse_int local_row = i % row_block_dim; for(rocsparse_int j = start; j < end; j++) { rocsparse_int col = csr_col_ind[j] - csr_base; rocsparse_int local_col = col % col_block_dim; rocsparse_int index = 0; for(rocsparse_int k = bstart; k < bend; k++) { if(bsr_col_ind[k] - bsr_base == col / col_block_dim) { index = k; bstart = k; break; } } if(direction == rocsparse_direction_row) { bsr_val[row_block_dim * col_block_dim * index + col_block_dim * local_row + local_col] = csr_val[j]; } else { bsr_val[row_block_dim * col_block_dim * index + row_block_dim * local_col + local_row] = csr_val[j]; } } } } template void host_gebsr_to_gebsc(rocsparse_int Mb, rocsparse_int Nb, rocsparse_int nnzb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, const std::vector& bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_action action, rocsparse_index_base base) { bsc_row_ind.resize(nnzb); bsc_col_ptr.resize(Nb + 1, 0); bsc_val.resize(nnzb * row_block_dim * col_block_dim); const rocsparse_int block_shift = row_block_dim * col_block_dim; // // Determine nnz per column // for(rocsparse_int i = 0; i < nnzb; ++i) { ++bsc_col_ptr[bsr_col_ind[i] + 1 - base]; } // Scan for(rocsparse_int i = 0; i < Nb; ++i) { bsc_col_ptr[i + 1] += bsc_col_ptr[i]; } // Fill row indices and values for(rocsparse_int i = 0; i < Mb; ++i) { const rocsparse_int row_begin = bsr_row_ptr[i] - base; const rocsparse_int row_end = bsr_row_ptr[i + 1] - base; for(rocsparse_int j = row_begin; j < row_end; ++j) { const rocsparse_int col = bsr_col_ind[j] - base; const rocsparse_int idx = bsc_col_ptr[col]; bsc_row_ind[idx] = i + base; for(rocsparse_int k = 0; k < block_shift; ++k) { bsc_val[idx * block_shift + k] = bsr_val[j * block_shift + k]; } ++bsc_col_ptr[col]; } } // Shift column pointer array for(rocsparse_int i = Nb; i > 0; --i) { bsc_col_ptr[i] = bsc_col_ptr[i - 1] + base; } bsc_col_ptr[0] = base; } template void host_gebsr_to_csr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base bsr_base, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind, rocsparse_index_base csr_base) { rocsparse_int m = mb * row_block_dim; size_t nnz = size_t(nnzb) * row_block_dim * col_block_dim; csr_row_ptr.resize(m + 1); csr_col_ind.resize(nnz); csr_val.resize(nnz); csr_row_ptr[0] = csr_base; if(nb == 0) { for(rocsparse_int i = 0; i < mb; ++i) { for(rocsparse_int r = 0; r < row_block_dim; ++r) { rocsparse_int row = i * row_block_dim + r; csr_row_ptr[row + 1] = csr_base; } } return; } for(rocsparse_int i = 0; i < mb; ++i) { rocsparse_int start = bsr_row_ptr[i] - bsr_base; rocsparse_int end = bsr_row_ptr[i + 1] - bsr_base; for(rocsparse_int k = start; k < end; ++k) { rocsparse_int j = bsr_col_ind[k] - bsr_base; for(rocsparse_int r = 0; r < row_block_dim; ++r) { for(rocsparse_int c = 0; c < col_block_dim; ++c) { rocsparse_int col = col_block_dim * j + c; rocsparse_int index = start * row_block_dim * col_block_dim + (end - start) * col_block_dim * r + (k - start) * col_block_dim + c; csr_col_ind[index] = col + csr_base; if(direction == rocsparse_direction_row) { csr_val[index] = bsr_val[k * row_block_dim * col_block_dim + col_block_dim * r + c]; } else { csr_val[index] = bsr_val[k * row_block_dim * col_block_dim + row_block_dim * c + r]; } } } } for(rocsparse_int r = 0; r < row_block_dim; ++r) { rocsparse_int row = i * row_block_dim + r; csr_row_ptr[row + 1] = csr_row_ptr[row] + (end - start) * col_block_dim; } } } template void host_gebsr_to_gebsr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val_A, const std::vector& bsr_row_ptr_A, const std::vector& bsr_col_ind_A, rocsparse_int row_block_dim_A, rocsparse_int col_block_dim_A, rocsparse_index_base base_A, std::vector& bsr_val_C, std::vector& bsr_row_ptr_C, std::vector& bsr_col_ind_C, rocsparse_int row_block_dim_C, rocsparse_int col_block_dim_C, rocsparse_index_base base_C) { rocsparse_int m = mb * row_block_dim_A; rocsparse_int n = nb * col_block_dim_A; // convert GEBSR to CSR format std::vector csr_row_ptr; std::vector csr_col_ind; std::vector csr_val; host_gebsr_to_csr(direction, mb, nb, nnzb, bsr_val_A, bsr_row_ptr_A, bsr_col_ind_A, row_block_dim_A, col_block_dim_A, base_A, csr_val, csr_row_ptr, csr_col_ind, rocsparse_index_base_zero); rocsparse_int nnz = csr_row_ptr[m] - csr_row_ptr[0]; // convert CSR to GEBSR format host_csr_to_gebsr(direction, m, n, nnz, csr_val, csr_row_ptr, csr_col_ind, row_block_dim_C, col_block_dim_C, rocsparse_index_base_zero, bsr_val_C, bsr_row_ptr_C, bsr_col_ind_C, base_C); } template void host_bsr_to_bsc(rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_int bsr_dim, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const T* bsr_val, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_index_base bsr_base, rocsparse_index_base bsc_base) { bsc_row_ind.resize(nnzb); bsc_col_ptr.resize(nb + 1, 0); bsc_val.resize(nnzb * bsr_dim * bsr_dim); // Determine nnz per column for(rocsparse_int i = 0; i < nnzb; ++i) { ++bsc_col_ptr[bsr_col_ind[i] + 1 - bsr_base]; } // Scan for(rocsparse_int i = 0; i < nb; ++i) { bsc_col_ptr[i + 1] += bsc_col_ptr[i]; } // Fill row indices and values for(rocsparse_int i = 0; i < mb; ++i) { rocsparse_int row_begin = bsr_row_ptr[i] - bsr_base; rocsparse_int row_end = bsr_row_ptr[i + 1] - bsr_base; for(rocsparse_int j = row_begin; j < row_end; ++j) { rocsparse_int col = bsr_col_ind[j] - bsr_base; rocsparse_int idx = bsc_col_ptr[col]; bsc_row_ind[idx] = i + bsc_base; for(rocsparse_int bi = 0; bi < bsr_dim; ++bi) { for(rocsparse_int bj = 0; bj < bsr_dim; ++bj) { bsc_val[bsr_dim * bsr_dim * idx + bi + bj * bsr_dim] = bsr_val[bsr_dim * bsr_dim * j + bi * bsr_dim + bj]; } } ++bsc_col_ptr[col]; } } // Shift column pointer array for(rocsparse_int i = nb; i > 0; --i) { bsc_col_ptr[i] = bsc_col_ptr[i - 1] + bsc_base; } bsc_col_ptr[0] = bsc_base; } template void host_csr_to_hyb(rocsparse_int M, rocsparse_int nnz, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, const std::vector& csr_val, std::vector& ell_col_ind, std::vector& ell_val, rocsparse_int& ell_width, rocsparse_int& ell_nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, rocsparse_int& coo_nnz, rocsparse_hyb_partition part, rocsparse_index_base base) { ell_nnz = 0; coo_nnz = 0; // Auto and user width if(part == rocsparse_hyb_partition_auto || part == rocsparse_hyb_partition_user) { // Determine ELL width ell_width = (part == rocsparse_hyb_partition_auto) ? (nnz - 1) / M + 1 : ell_width; // Determine COO nnz for(rocsparse_int i = 0; i < M; ++i) { rocsparse_int row_nnz = csr_row_ptr[i + 1] - csr_row_ptr[i]; if(row_nnz > ell_width) { coo_nnz += row_nnz - ell_width; } } } else if(part == rocsparse_hyb_partition_max) { ell_width = 0; // Determine max nnz per row for(rocsparse_int i = 0; i < M; ++i) { rocsparse_int row_nnz = csr_row_ptr[i + 1] - csr_row_ptr[i]; ell_width = std::max(ell_width, row_nnz); } } // ELL nnz ell_nnz = ell_width * M; // Allocate memory for HYB matrix if(ell_nnz > 0) { ell_col_ind.resize(ell_nnz); ell_val.resize(ell_nnz); } if(coo_nnz > 0) { coo_row_ind.resize(coo_nnz); coo_col_ind.resize(coo_nnz); coo_val.resize(coo_nnz); } // Fill HYB rocsparse_int coo_idx = 0; for(rocsparse_int i = 0; i < M; ++i) { rocsparse_int p = 0; rocsparse_int row_begin = csr_row_ptr[i] - base; rocsparse_int row_end = csr_row_ptr[i + 1] - base; rocsparse_int row_nnz = row_end - row_begin; for(rocsparse_int j = row_begin; j < row_end; ++j) { if(p < ell_width) { rocsparse_int idx = p++ * M + i; ell_col_ind[idx] = csr_col_ind[j]; ell_val[idx] = csr_val[j]; } else { coo_row_ind[coo_idx] = i + base; coo_col_ind[coo_idx] = csr_col_ind[j]; coo_val[coo_idx++] = csr_val[j]; } } for(rocsparse_int j = row_nnz; j < ell_width; ++j) { rocsparse_int idx = p++ * M + i; ell_col_ind[idx] = -1; ell_val[idx] = static_cast(0); } } } template void host_csr_to_csr_compress(rocsparse_int M, rocsparse_int N, rocsparse_int nnz, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base base, T tol) { if(M <= 0 || N <= 0) { return; } // find how many entries will be in each compressed CSR matrix row std::vector nnz_per_row(M); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; i++) { rocsparse_int start = csr_row_ptr_A[i] - base; rocsparse_int end = csr_row_ptr_A[i + 1] - base; rocsparse_int count = 0; for(rocsparse_int j = start; j < end; j++) { if(std::abs(csr_val_A[j]) > std::real(tol) && std::abs(csr_val_A[j]) > std::numeric_limits::min()) { count++; } } nnz_per_row[i] = count; } // add up total number of entries rocsparse_int nnz_C = 0; for(rocsparse_int i = 0; i < M; i++) { nnz_C += nnz_per_row[i]; } //column indices and value arrays for compressed CSR matrix csr_col_ind_C.resize(nnz_C); csr_val_C.resize(nnz_C); // fill in row pointer array for compressed CSR matrix csr_row_ptr_C.resize(M + 1); csr_row_ptr_C[0] = base; for(rocsparse_int i = 0; i < M; i++) { csr_row_ptr_C[i + 1] = csr_row_ptr_C[i] + nnz_per_row[i]; } // fill in column indices and value arrays for compressed CSR matrix #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; i++) { rocsparse_int start = csr_row_ptr_A[i] - base; rocsparse_int end = csr_row_ptr_A[i + 1] - base; rocsparse_int index = csr_row_ptr_C[i] - base; for(rocsparse_int j = start; j < end; j++) { if(std::abs(csr_val_A[j]) > std::real(tol) && std::abs(csr_val_A[j]) > std::numeric_limits::min()) { csr_col_ind_C[index] = csr_col_ind_A[j]; csr_val_C[index] = csr_val_A[j]; index++; } } } } template void host_prune_csr_to_csr(rocsparse_int M, rocsparse_int N, rocsparse_int nnz_A, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, rocsparse_int& nnz_C, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base csr_base_A, rocsparse_index_base csr_base_C, T threshold) { csr_row_ptr_C.resize(M + 1, 0); csr_row_ptr_C[0] = csr_base_C; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; i++) { for(rocsparse_int j = csr_row_ptr_A[i] - csr_base_A; j < csr_row_ptr_A[i + 1] - csr_base_A; j++) { if(std::abs(csr_val_A[j]) > threshold && std::abs(csr_val_A[j]) > std::numeric_limits::min()) { csr_row_ptr_C[i + 1]++; } } } for(rocsparse_int i = 1; i <= M; i++) { csr_row_ptr_C[i] += csr_row_ptr_C[i - 1]; } nnz_C = csr_row_ptr_C[M] - csr_row_ptr_C[0]; csr_col_ind_C.resize(nnz_C); csr_val_C.resize(nnz_C); rocsparse_int index = 0; for(rocsparse_int i = 0; i < M; i++) { for(rocsparse_int j = csr_row_ptr_A[i] - csr_base_A; j < csr_row_ptr_A[i + 1] - csr_base_A; j++) { if(std::abs(csr_val_A[j]) > threshold && std::abs(csr_val_A[j]) > std::numeric_limits::min()) { csr_col_ind_C[index] = (csr_col_ind_A[j] - csr_base_A) + csr_base_C; csr_val_C[index] = csr_val_A[j]; index++; } } } } template void host_prune_csr_to_csr_by_percentage(rocsparse_int M, rocsparse_int N, rocsparse_int nnz_A, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, rocsparse_int& nnz_C, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base csr_base_A, rocsparse_index_base csr_base_C, T percentage) { rocsparse_int pos = std::ceil(nnz_A * (percentage / 100)) - 1; pos = std::min(pos, nnz_A - 1); pos = std::max(pos, 0); std::vector sorted_A(nnz_A); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < nnz_A; i++) { sorted_A[i] = std::abs(csr_val_A[i]); } std::sort(sorted_A.begin(), sorted_A.end()); T threshold = nnz_A != 0 ? sorted_A[pos] : static_cast(0); host_prune_csr_to_csr(M, N, nnz_A, csr_row_ptr_A, csr_col_ind_A, csr_val_A, nnz_C, csr_row_ptr_C, csr_col_ind_C, csr_val_C, csr_base_A, csr_base_C, threshold); } template void host_ell_to_csr(rocsparse_int M, rocsparse_int N, const std::vector& ell_col_ind, const std::vector& ell_val, rocsparse_int ell_width, std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, rocsparse_int& csr_nnz, rocsparse_index_base ell_base, rocsparse_index_base csr_base) { csr_row_ptr.resize(M + 1, 0); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; ++i) { for(rocsparse_int p = 0; p < ell_width; ++p) { rocsparse_int idx = p * M + i; rocsparse_int col = ell_col_ind[idx] - ell_base; if(col >= 0 && col < N) { ++csr_row_ptr[i]; } } } // Determine row pointers csr_nnz = csr_base; for(rocsparse_int i = 0; i < M; ++i) { rocsparse_int tmp = csr_row_ptr[i]; csr_row_ptr[i] = csr_nnz; csr_nnz += tmp; } csr_row_ptr[M] = csr_nnz; csr_nnz -= csr_base; // Allocate memory for columns and values csr_col_ind.resize(csr_nnz); csr_val.resize(csr_nnz); // Fill CSR structure #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(rocsparse_int i = 0; i < M; ++i) { rocsparse_int csr_idx = csr_row_ptr[i] - csr_base; for(rocsparse_int p = 0; p < ell_width; ++p) { rocsparse_int idx = p * M + i; rocsparse_int col = ell_col_ind[idx] - ell_base; if(col >= 0 && col < N) { csr_col_ind[csr_idx] = col + csr_base; csr_val[csr_idx] = ell_val[idx]; ++csr_idx; } } } } template void host_coosort_by_column(rocsparse_int M, rocsparse_int nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val) { // Permutation vector std::vector perm(nnz); for(rocsparse_int i = 0; i < nnz; ++i) { perm[i] = i; } std::vector tmp_row(nnz); std::vector tmp_col(nnz); std::vector tmp_val(nnz); tmp_row = coo_row_ind; tmp_col = coo_col_ind; tmp_val = coo_val; // Sort std::sort(perm.begin(), perm.end(), [&](const rocsparse_int& a, const rocsparse_int& b) { if(tmp_col[a] < tmp_col[b]) { return true; } else if(tmp_col[a] == tmp_col[b]) { return (tmp_row[a] < tmp_row[b]); } else { return false; } }); for(rocsparse_int i = 0; i < nnz; ++i) { coo_row_ind[i] = tmp_row[perm[i]]; coo_col_ind[i] = tmp_col[perm[i]]; coo_val[i] = tmp_val[perm[i]]; } } template void host_bsrpad_value(rocsparse_int m, rocsparse_int mb, rocsparse_int nnzb, rocsparse_int block_dim, T value, T* __restrict__ bsr_val, const rocsparse_int* __restrict__ bsr_row_ptr, const rocsparse_int* __restrict__ bsr_col_ind, rocsparse_index_base bsr_base) { rocsparse_int start_local_index = m % block_dim; rocsparse_int start = bsr_row_ptr[mb - 1] - bsr_base; rocsparse_int end = bsr_row_ptr[mb] - bsr_base; if((start_local_index > 0) && (end - start > 0)) { if((bsr_col_ind[end - 1] - bsr_base) == (mb - 1)) { // then we pad. for(rocsparse_int i = start_local_index; i < block_dim; ++i) { bsr_val[(end - 1) * block_dim * block_dim + i * block_dim + i] = value; } } else { // search for diagonal block for(rocsparse_int index = start; index < end; index++) { if((bsr_col_ind[index] - bsr_base) == (mb - 1)) { // then we pad. for(int i = start_local_index; i < block_dim; ++i) { bsr_val[index * block_dim * block_dim + i * block_dim + i] = value; } break; } } } } } // INSTANTIATE template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; template struct rocsparse_host; /* * =========================================================================== * level 1 SPARSE * =========================================================================== */ template void host_gthrz(rocsparse_int nnz, float* y, float* x_val, const rocsparse_int* x_ind, rocsparse_index_base base); /* * =========================================================================== * level 2 SPARSE * =========================================================================== */ template void host_bsrxmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int size_of_mask, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, float alpha, const rocsparse_int* bsr_mask_ptr, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_end_ptr, const rocsparse_int* bsr_col_ind, const float* bsr_val, rocsparse_int bsr_dim, const float* x, float beta, float* y, rocsparse_index_base base); template void host_bsrsv(rocsparse_operation trans, rocsparse_direction dir, rocsparse_int mb, rocsparse_int nnzb, float alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const float* bsr_val, rocsparse_int bsr_dim, const float* x, float* y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_hybmv(rocsparse_operation trans, rocsparse_int M, rocsparse_int N, float alpha, rocsparse_int ell_nnz, const rocsparse_int* ell_col_ind, const float* ell_val, rocsparse_int ell_width, rocsparse_int coo_nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, const float* coo_val, const float* x, float beta, float* y, rocsparse_index_base base); template void host_gebsrmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, float alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const float* bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const float* x, float beta, float* y, rocsparse_index_base base); /* * =========================================================================== * level 3 SPARSE * =========================================================================== */ template void host_bsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transB, rocsparse_int Mb, rocsparse_int N, rocsparse_int Kb, rocsparse_int nnzb, const float* alpha, const rocsparse_mat_descr descr, const float* bsr_val_A, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_int block_dim, const float* B, rocsparse_int ldb, const float* beta, float* C, rocsparse_int ldc); template void host_gebsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation trans_A, rocsparse_operation trans_B, rocsparse_int mb, rocsparse_int n, rocsparse_int kb, rocsparse_int nnzb, const float* alpha, const rocsparse_mat_descr descr, const float* bsr_val, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const float* B, rocsparse_int ldb, const float* beta, float* C, rocsparse_int ldc); template void host_gebsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation trans_A, rocsparse_operation trans_B, rocsparse_int mb, rocsparse_int n, rocsparse_int kb, rocsparse_int nnzb, const double* alpha, const rocsparse_mat_descr descr, const double* bsr_val, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const double* B, rocsparse_int ldb, const double* beta, double* C, rocsparse_int ldc); template void host_gebsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation trans_A, rocsparse_operation trans_B, rocsparse_int mb, rocsparse_int n, rocsparse_int kb, rocsparse_int nnzb, const rocsparse_float_complex* alpha, const rocsparse_mat_descr descr, const rocsparse_float_complex* bsr_val, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const rocsparse_float_complex* B, rocsparse_int ldb, const rocsparse_float_complex* beta, rocsparse_float_complex* C, rocsparse_int ldc); template void host_gebsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation trans_A, rocsparse_operation trans_B, rocsparse_int mb, rocsparse_int n, rocsparse_int kb, rocsparse_int nnzb, const rocsparse_double_complex* alpha, const rocsparse_mat_descr descr, const rocsparse_double_complex* bsr_val, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const rocsparse_double_complex* B, rocsparse_int ldb, const rocsparse_double_complex* beta, rocsparse_double_complex* C, rocsparse_int ldc); template void host_bsrsm(rocsparse_int mb, rocsparse_int nrhs, rocsparse_int nnzb, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transX, float alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const float* bsr_val, rocsparse_int bsr_dim, const float* B, rocsparse_int ldb, float* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_gemmi(rocsparse_int M, rocsparse_int N, rocsparse_operation transA, rocsparse_operation transB, float alpha, const float* A, rocsparse_int lda, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, const float* csr_val, float beta, float* C, rocsparse_int ldc, rocsparse_index_base base); /* * =========================================================================== * extra SPARSE * =========================================================================== */ template void host_bsrgeam_nnzb(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, float alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, float beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, rocsparse_int* bsr_row_ptr_C, rocsparse_int* nnzb_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_bsrgeam(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, float alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, const float* bsr_val_A, float beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, const float* bsr_val_B, const rocsparse_int* bsr_row_ptr_C, rocsparse_int* bsr_col_ind_C, float* bsr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam_nnz(rocsparse_int M, rocsparse_int N, float alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, float beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, rocsparse_int* csr_row_ptr_C, rocsparse_int* nnz_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam(rocsparse_int M, rocsparse_int N, float alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, const float* csr_val_A, float beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, const float* csr_val_B, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, float* csr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); /* * =========================================================================== * precond SPARSE * =========================================================================== */ template void host_bsric0(rocsparse_direction direction, rocsparse_int Mb, rocsparse_int block_dim, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_bsrilu0(rocsparse_direction dir, rocsparse_int mb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_int bsr_dim, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, float boost_tol, float boost_val); template void host_csric0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_csrilu0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, float boost_tol, float boost_val); template void host_gtsv_no_pivot(rocsparse_int m, rocsparse_int n, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& B, rocsparse_int ldb); template void host_gtsv_no_pivot_strided_batch(rocsparse_int m, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gtsv_interleaved_batch(rocsparse_gtsv_interleaved_alg algo, rocsparse_int m, const float* dl, const float* d, const float* du, float* x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gpsv_interleaved_batch(rocsparse_gpsv_interleaved_alg algo, rocsparse_int m, float* ds, float* dl, float* d, float* du, float* dw, float* x, rocsparse_int batch_count, rocsparse_int batch_stride); /* * =========================================================================== * conversion SPARSE * =========================================================================== */ template rocsparse_status host_nnz(rocsparse_direction dirA, rocsparse_int m, rocsparse_int n, const float* A, rocsparse_int lda, rocsparse_int* nnz_per_row_columns, rocsparse_int* nnz_total_dev_host_ptr); template void host_prune_dense2csr(rocsparse_int m, rocsparse_int n, const std::vector& A, rocsparse_int lda, rocsparse_index_base base, float threshold, rocsparse_int& nnz, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind); template void host_prune_dense2csr_by_percentage(rocsparse_int m, rocsparse_int n, const std::vector& A, rocsparse_int lda, rocsparse_index_base base, float percentage, rocsparse_int& nnz, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind); template void host_csr_to_gebsr(rocsparse_direction direction, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const std::vector& csr_val, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base csr_base, std::vector& bsr_val, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, rocsparse_index_base bsr_base); template void host_gebsr_to_gebsc(rocsparse_int Mb, rocsparse_int Nb, rocsparse_int nnzb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, const std::vector& bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_action action, rocsparse_index_base base); template void host_gebsr_to_csr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base bsr_base, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind, rocsparse_index_base csr_base); template void host_gebsr_to_gebsr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val_A, const std::vector& bsr_row_ptr_A, const std::vector& bsr_col_ind_A, rocsparse_int row_block_dim_A, rocsparse_int col_block_dim_A, rocsparse_index_base base_A, std::vector& bsr_val_C, std::vector& bsr_row_ptr_C, std::vector& bsr_col_ind_C, rocsparse_int row_block_dim_C, rocsparse_int col_block_dim_C, rocsparse_index_base base_C); template void host_bsr_to_bsc(rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_int bsr_dim, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const float* bsr_val, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_index_base bsr_base, rocsparse_index_base bsc_base); template void host_csr_to_hyb(rocsparse_int M, rocsparse_int nnz, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, const std::vector& csr_val, std::vector& ell_col_ind, std::vector& ell_val, rocsparse_int& ell_width, rocsparse_int& ell_nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, rocsparse_int& coo_nnz, rocsparse_hyb_partition part, rocsparse_index_base base); template void host_csr_to_csr_compress(rocsparse_int M, rocsparse_int N, rocsparse_int nnz, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base base, float tol); template void host_prune_csr_to_csr(rocsparse_int M, rocsparse_int N, rocsparse_int nnz_A, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, rocsparse_int& nnz_C, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base csr_base_A, rocsparse_index_base csr_base_C, float threshold); template void host_prune_csr_to_csr_by_percentage(rocsparse_int M, rocsparse_int N, rocsparse_int nnz_A, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, rocsparse_int& nnz_C, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base csr_base_A, rocsparse_index_base csr_base_C, float percentage); template void host_ell_to_csr(rocsparse_int M, rocsparse_int N, const std::vector& ell_col_ind, const std::vector& ell_val, rocsparse_int ell_width, std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, rocsparse_int& csr_nnz, rocsparse_index_base ell_base, rocsparse_index_base csr_base); template void host_coosort_by_column(rocsparse_int M, rocsparse_int nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val); // DOUBLE /* * =========================================================================== * level 1 SPARSE * =========================================================================== */ template void host_gthrz(rocsparse_int nnz, double* y, double* x_val, const rocsparse_int* x_ind, rocsparse_index_base base); /* * =========================================================================== * level 2 SPARSE * =========================================================================== */ template void host_bsrxmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int size_of_mask, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, double alpha, const rocsparse_int* bsr_mask_ptr, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_end_ptr, const rocsparse_int* bsr_col_ind, const double* bsr_val, rocsparse_int bsr_dim, const double* x, double beta, double* y, rocsparse_index_base base); template void host_bsrsv(rocsparse_operation trans, rocsparse_direction dir, rocsparse_int mb, rocsparse_int nnzb, double alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const double* bsr_val, rocsparse_int bsr_dim, const double* x, double* y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_hybmv(rocsparse_operation trans, rocsparse_int M, rocsparse_int N, double alpha, rocsparse_int ell_nnz, const rocsparse_int* ell_col_ind, const double* ell_val, rocsparse_int ell_width, rocsparse_int coo_nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, const double* coo_val, const double* x, double beta, double* y, rocsparse_index_base base); template void host_gebsrmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, double alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const double* bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const double* x, double beta, double* y, rocsparse_index_base base); /* * =========================================================================== * level 3 SPARSE * =========================================================================== */ template void host_bsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transB, rocsparse_int Mb, rocsparse_int N, rocsparse_int Kb, rocsparse_int nnzb, const double* alpha, const rocsparse_mat_descr descr, const double* bsr_val_A, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_int block_dim, const double* B, rocsparse_int ldb, const double* beta, double* C, rocsparse_int ldc); template void host_bsrsm(rocsparse_int mb, rocsparse_int nrhs, rocsparse_int nnzb, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transX, double alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const double* bsr_val, rocsparse_int bsr_dim, const double* B, rocsparse_int ldb, double* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_gemmi(rocsparse_int M, rocsparse_int N, rocsparse_operation transA, rocsparse_operation transB, double alpha, const double* A, rocsparse_int lda, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, const double* csr_val, double beta, double* C, rocsparse_int ldc, rocsparse_index_base base); /* * =========================================================================== * extra SPARSE * =========================================================================== */ template void host_bsrgeam_nnzb(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, double alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, double beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, rocsparse_int* bsr_row_ptr_C, rocsparse_int* nnzb_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_bsrgeam(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, double alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, const double* bsr_val_A, double beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, const double* bsr_val_B, const rocsparse_int* bsr_row_ptr_C, rocsparse_int* bsr_col_ind_C, double* bsr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam_nnz(rocsparse_int M, rocsparse_int N, double alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, double beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, rocsparse_int* csr_row_ptr_C, rocsparse_int* nnz_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam(rocsparse_int M, rocsparse_int N, double alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, const double* csr_val_A, double beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, const double* csr_val_B, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, double* csr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); /* * =========================================================================== * precond SPARSE * =========================================================================== */ template void host_bsric0(rocsparse_direction direction, rocsparse_int Mb, rocsparse_int block_dim, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_bsrilu0(rocsparse_direction dir, rocsparse_int mb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_int bsr_dim, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, double boost_tol, double boost_val); template void host_csric0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_csrilu0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, double boost_tol, double boost_val); template void host_gtsv_no_pivot(rocsparse_int m, rocsparse_int n, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& B, rocsparse_int ldb); template void host_gtsv_no_pivot_strided_batch(rocsparse_int m, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gtsv_interleaved_batch(rocsparse_gtsv_interleaved_alg algo, rocsparse_int m, const double* dl, const double* d, const double* du, double* x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gpsv_interleaved_batch(rocsparse_gpsv_interleaved_alg algo, rocsparse_int m, double* ds, double* dl, double* d, double* du, double* dw, double* x, rocsparse_int batch_count, rocsparse_int batch_stride); /* * =========================================================================== * conversion SPARSE * =========================================================================== */ template rocsparse_status host_nnz(rocsparse_direction dirA, rocsparse_int m, rocsparse_int n, const double* A, rocsparse_int lda, rocsparse_int* nnz_per_row_columns, rocsparse_int* nnz_total_dev_host_ptr); template void host_prune_dense2csr(rocsparse_int m, rocsparse_int n, const std::vector& A, rocsparse_int lda, rocsparse_index_base base, double threshold, rocsparse_int& nnz, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind); template void host_prune_dense2csr_by_percentage(rocsparse_int m, rocsparse_int n, const std::vector& A, rocsparse_int lda, rocsparse_index_base base, double percentage, rocsparse_int& nnz, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind); template void host_csr_to_gebsr(rocsparse_direction direction, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const std::vector& csr_val, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base csr_base, std::vector& bsr_val, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, rocsparse_index_base bsr_base); template void host_gebsr_to_gebsc(rocsparse_int Mb, rocsparse_int Nb, rocsparse_int nnzb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, const std::vector& bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_action action, rocsparse_index_base base); template void host_gebsr_to_csr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base bsr_base, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind, rocsparse_index_base csr_base); template void host_gebsr_to_gebsr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val_A, const std::vector& bsr_row_ptr_A, const std::vector& bsr_col_ind_A, rocsparse_int row_block_dim_A, rocsparse_int col_block_dim_A, rocsparse_index_base base_A, std::vector& bsr_val_C, std::vector& bsr_row_ptr_C, std::vector& bsr_col_ind_C, rocsparse_int row_block_dim_C, rocsparse_int col_block_dim_C, rocsparse_index_base base_C); template void host_bsr_to_bsc(rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_int bsr_dim, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const double* bsr_val, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_index_base bsr_base, rocsparse_index_base bsc_base); template void host_csr_to_hyb(rocsparse_int M, rocsparse_int nnz, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, const std::vector& csr_val, std::vector& ell_col_ind, std::vector& ell_val, rocsparse_int& ell_width, rocsparse_int& ell_nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, rocsparse_int& coo_nnz, rocsparse_hyb_partition part, rocsparse_index_base base); template void host_csr_to_csr_compress(rocsparse_int M, rocsparse_int N, rocsparse_int nnz, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base base, double tol); template void host_prune_csr_to_csr(rocsparse_int M, rocsparse_int N, rocsparse_int nnz_A, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, rocsparse_int& nnz_C, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base csr_base_A, rocsparse_index_base csr_base_C, double threshold); template void host_prune_csr_to_csr_by_percentage(rocsparse_int M, rocsparse_int N, rocsparse_int nnz_A, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, rocsparse_int& nnz_C, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base csr_base_A, rocsparse_index_base csr_base_C, double percentage); template void host_ell_to_csr(rocsparse_int M, rocsparse_int N, const std::vector& ell_col_ind, const std::vector& ell_val, rocsparse_int ell_width, std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, rocsparse_int& csr_nnz, rocsparse_index_base ell_base, rocsparse_index_base csr_base); template void host_coosort_by_column(rocsparse_int M, rocsparse_int nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val); // ROCSPARSE_DOUBLE_COMPLEX /* * =========================================================================== * level 1 SPARSE * =========================================================================== */ template void host_gthrz(rocsparse_int nnz, rocsparse_double_complex* y, rocsparse_double_complex* x_val, const rocsparse_int* x_ind, rocsparse_index_base base); /* * =========================================================================== * level 2 SPARSE * =========================================================================== */ template void host_bsrxmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int size_of_mask, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_double_complex alpha, const rocsparse_int* bsr_mask_ptr, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_end_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_double_complex* bsr_val, rocsparse_int bsr_dim, const rocsparse_double_complex* x, rocsparse_double_complex beta, rocsparse_double_complex* y, rocsparse_index_base base); template void host_bsrsv(rocsparse_operation trans, rocsparse_direction dir, rocsparse_int mb, rocsparse_int nnzb, rocsparse_double_complex alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_double_complex* bsr_val, rocsparse_int bsr_dim, const rocsparse_double_complex* x, rocsparse_double_complex* y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_hybmv(rocsparse_operation trans, rocsparse_int M, rocsparse_int N, rocsparse_double_complex alpha, rocsparse_int ell_nnz, const rocsparse_int* ell_col_ind, const rocsparse_double_complex* ell_val, rocsparse_int ell_width, rocsparse_int coo_nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, const rocsparse_double_complex* coo_val, const rocsparse_double_complex* x, rocsparse_double_complex beta, rocsparse_double_complex* y, rocsparse_index_base base); template void host_gebsrmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_double_complex alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_double_complex* bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const rocsparse_double_complex* x, rocsparse_double_complex beta, rocsparse_double_complex* y, rocsparse_index_base base); /* * =========================================================================== * level 3 SPARSE * =========================================================================== */ template void host_bsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transB, rocsparse_int Mb, rocsparse_int N, rocsparse_int Kb, rocsparse_int nnzb, const rocsparse_double_complex* alpha, const rocsparse_mat_descr descr, const rocsparse_double_complex* bsr_val_A, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_int block_dim, const rocsparse_double_complex* B, rocsparse_int ldb, const rocsparse_double_complex* beta, rocsparse_double_complex* C, rocsparse_int ldc); template void host_bsrsm(rocsparse_int mb, rocsparse_int nrhs, rocsparse_int nnzb, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transX, rocsparse_double_complex alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_double_complex* bsr_val, rocsparse_int bsr_dim, const rocsparse_double_complex* B, rocsparse_int ldb, rocsparse_double_complex* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_gemmi(rocsparse_int M, rocsparse_int N, rocsparse_operation transA, rocsparse_operation transB, rocsparse_double_complex alpha, const rocsparse_double_complex* A, rocsparse_int lda, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, const rocsparse_double_complex* csr_val, rocsparse_double_complex beta, rocsparse_double_complex* C, rocsparse_int ldc, rocsparse_index_base base); /* * =========================================================================== * extra SPARSE * =========================================================================== */ template void host_bsrgeam_nnzb(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, rocsparse_double_complex alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_double_complex beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, rocsparse_int* bsr_row_ptr_C, rocsparse_int* nnzb_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_bsrgeam(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, rocsparse_double_complex alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, const rocsparse_double_complex* bsr_val_A, rocsparse_double_complex beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, const rocsparse_double_complex* bsr_val_B, const rocsparse_int* bsr_row_ptr_C, rocsparse_int* bsr_col_ind_C, rocsparse_double_complex* bsr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam_nnz(rocsparse_int M, rocsparse_int N, rocsparse_double_complex alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, rocsparse_double_complex beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, rocsparse_int* csr_row_ptr_C, rocsparse_int* nnz_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam(rocsparse_int M, rocsparse_int N, rocsparse_double_complex alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, const rocsparse_double_complex* csr_val_A, rocsparse_double_complex beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, const rocsparse_double_complex* csr_val_B, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, rocsparse_double_complex* csr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); /* * =========================================================================== * precond SPARSE * =========================================================================== */ template void host_bsric0(rocsparse_direction direction, rocsparse_int Mb, rocsparse_int block_dim, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_bsrilu0(rocsparse_direction dir, rocsparse_int mb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_int bsr_dim, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, double boost_tol, rocsparse_double_complex boost_val); template void host_csric0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_csrilu0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, double boost_tol, rocsparse_double_complex boost_val); template void host_gtsv_no_pivot(rocsparse_int m, rocsparse_int n, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& B, rocsparse_int ldb); template void host_gtsv_no_pivot_strided_batch(rocsparse_int m, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gtsv_interleaved_batch(rocsparse_gtsv_interleaved_alg algo, rocsparse_int m, const rocsparse_double_complex* dl, const rocsparse_double_complex* d, const rocsparse_double_complex* du, rocsparse_double_complex* x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gpsv_interleaved_batch(rocsparse_gpsv_interleaved_alg algo, rocsparse_int m, rocsparse_double_complex* ds, rocsparse_double_complex* dl, rocsparse_double_complex* d, rocsparse_double_complex* du, rocsparse_double_complex* dw, rocsparse_double_complex* x, rocsparse_int batch_count, rocsparse_int batch_stride); /* * =========================================================================== * conversion SPARSE * =========================================================================== */ template rocsparse_status host_nnz(rocsparse_direction dirA, rocsparse_int m, rocsparse_int n, const rocsparse_double_complex* A, rocsparse_int lda, rocsparse_int* nnz_per_row_columns, rocsparse_int* nnz_total_dev_host_ptr); template void host_csr_to_gebsr(rocsparse_direction direction, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const std::vector& csr_val, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base csr_base, std::vector& bsr_val, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, rocsparse_index_base bsr_base); template void host_gebsr_to_gebsc(rocsparse_int Mb, rocsparse_int Nb, rocsparse_int nnzb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, const std::vector& bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_action action, rocsparse_index_base base); template void host_gebsr_to_csr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base bsr_base, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind, rocsparse_index_base csr_base); template void host_gebsr_to_gebsr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val_A, const std::vector& bsr_row_ptr_A, const std::vector& bsr_col_ind_A, rocsparse_int row_block_dim_A, rocsparse_int col_block_dim_A, rocsparse_index_base base_A, std::vector& bsr_val_C, std::vector& bsr_row_ptr_C, std::vector& bsr_col_ind_C, rocsparse_int row_block_dim_C, rocsparse_int col_block_dim_C, rocsparse_index_base base_C); template void host_bsr_to_bsc(rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_int bsr_dim, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_double_complex* bsr_val, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_index_base bsr_base, rocsparse_index_base bsc_base); template void host_csr_to_hyb(rocsparse_int M, rocsparse_int nnz, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, const std::vector& csr_val, std::vector& ell_col_ind, std::vector& ell_val, rocsparse_int& ell_width, rocsparse_int& ell_nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, rocsparse_int& coo_nnz, rocsparse_hyb_partition part, rocsparse_index_base base); template void host_csr_to_csr_compress(rocsparse_int M, rocsparse_int N, rocsparse_int nnz, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base base, rocsparse_double_complex tol); template void host_ell_to_csr(rocsparse_int M, rocsparse_int N, const std::vector& ell_col_ind, const std::vector& ell_val, rocsparse_int ell_width, std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, rocsparse_int& csr_nnz, rocsparse_index_base ell_base, rocsparse_index_base csr_base); template void host_coosort_by_column(rocsparse_int M, rocsparse_int nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val); // ROCSPARSE_FLOAT_COMPLEX /* * =========================================================================== * level 1 SPARSE * =========================================================================== */ template void host_gthrz(rocsparse_int nnz, rocsparse_float_complex* y, rocsparse_float_complex* x_val, const rocsparse_int* x_ind, rocsparse_index_base base); /* * =========================================================================== * level 2 SPARSE * =========================================================================== */ template void host_bsrxmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int size_of_mask, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_float_complex alpha, const rocsparse_int* bsr_mask_ptr, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_end_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_float_complex* bsr_val, rocsparse_int bsr_dim, const rocsparse_float_complex* x, rocsparse_float_complex beta, rocsparse_float_complex* y, rocsparse_index_base base); template void host_bsrsv(rocsparse_operation trans, rocsparse_direction dir, rocsparse_int mb, rocsparse_int nnzb, rocsparse_float_complex alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_float_complex* bsr_val, rocsparse_int bsr_dim, const rocsparse_float_complex* x, rocsparse_float_complex* y, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_hybmv(rocsparse_operation trans, rocsparse_int M, rocsparse_int N, rocsparse_float_complex alpha, rocsparse_int ell_nnz, const rocsparse_int* ell_col_ind, const rocsparse_float_complex* ell_val, rocsparse_int ell_width, rocsparse_int coo_nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, const rocsparse_float_complex* coo_val, const rocsparse_float_complex* x, rocsparse_float_complex beta, rocsparse_float_complex* y, rocsparse_index_base base); template void host_gebsrmv(rocsparse_direction dir, rocsparse_operation trans, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_float_complex alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_float_complex* bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, const rocsparse_float_complex* x, rocsparse_float_complex beta, rocsparse_float_complex* y, rocsparse_index_base base); /* * =========================================================================== * level 3 SPARSE * =========================================================================== */ template void host_bsrmm(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transB, rocsparse_int Mb, rocsparse_int N, rocsparse_int Kb, rocsparse_int nnzb, const rocsparse_float_complex* alpha, const rocsparse_mat_descr descr, const rocsparse_float_complex* bsr_val_A, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_int block_dim, const rocsparse_float_complex* B, rocsparse_int ldb, const rocsparse_float_complex* beta, rocsparse_float_complex* C, rocsparse_int ldc); template void host_bsrsm(rocsparse_int mb, rocsparse_int nrhs, rocsparse_int nnzb, rocsparse_direction dir, rocsparse_operation transA, rocsparse_operation transX, rocsparse_float_complex alpha, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_float_complex* bsr_val, rocsparse_int bsr_dim, const rocsparse_float_complex* B, rocsparse_int ldb, rocsparse_float_complex* X, rocsparse_int ldx, rocsparse_diag_type diag_type, rocsparse_fill_mode fill_mode, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_gemmi(rocsparse_int M, rocsparse_int N, rocsparse_operation transA, rocsparse_operation transB, rocsparse_float_complex alpha, const rocsparse_float_complex* A, rocsparse_int lda, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, const rocsparse_float_complex* csr_val, rocsparse_float_complex beta, rocsparse_float_complex* C, rocsparse_int ldc, rocsparse_index_base base); /* * =========================================================================== * extra SPARSE * =========================================================================== */ template void host_bsrgeam_nnzb(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, rocsparse_float_complex alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, rocsparse_float_complex beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, rocsparse_int* bsr_row_ptr_C, rocsparse_int* nnzb_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_bsrgeam(rocsparse_direction dir, rocsparse_int Mb, rocsparse_int Nb, rocsparse_int block_dim, rocsparse_float_complex alpha, const rocsparse_int* bsr_row_ptr_A, const rocsparse_int* bsr_col_ind_A, const rocsparse_float_complex* bsr_val_A, rocsparse_float_complex beta, const rocsparse_int* bsr_row_ptr_B, const rocsparse_int* bsr_col_ind_B, const rocsparse_float_complex* bsr_val_B, const rocsparse_int* bsr_row_ptr_C, rocsparse_int* bsr_col_ind_C, rocsparse_float_complex* bsr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam_nnz(rocsparse_int M, rocsparse_int N, rocsparse_float_complex alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, rocsparse_float_complex beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, rocsparse_int* csr_row_ptr_C, rocsparse_int* nnz_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); template void host_csrgeam(rocsparse_int M, rocsparse_int N, rocsparse_float_complex alpha, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, const rocsparse_float_complex* csr_val_A, rocsparse_float_complex beta, const rocsparse_int* csr_row_ptr_B, const rocsparse_int* csr_col_ind_B, const rocsparse_float_complex* csr_val_B, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, rocsparse_float_complex* csr_val_C, rocsparse_index_base base_A, rocsparse_index_base base_B, rocsparse_index_base base_C); /* * =========================================================================== * precond SPARSE * =========================================================================== */ template void host_bsric0(rocsparse_direction direction, rocsparse_int Mb, rocsparse_int block_dim, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_bsrilu0(rocsparse_direction dir, rocsparse_int mb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_int bsr_dim, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, float boost_tol, rocsparse_float_complex boost_val); template void host_csric0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot); template void host_csrilu0(rocsparse_int M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& csr_val, rocsparse_index_base base, rocsparse_int* struct_pivot, rocsparse_int* numeric_pivot, bool boost, float boost_tol, rocsparse_float_complex boost_val); template void host_gtsv_no_pivot(rocsparse_int m, rocsparse_int n, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& B, rocsparse_int ldb); template void host_gtsv_no_pivot_strided_batch(rocsparse_int m, const std::vector& dl, const std::vector& d, const std::vector& du, std::vector& x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gtsv_interleaved_batch(rocsparse_gtsv_interleaved_alg algo, rocsparse_int m, const rocsparse_float_complex* dl, const rocsparse_float_complex* d, const rocsparse_float_complex* du, rocsparse_float_complex* x, rocsparse_int batch_count, rocsparse_int batch_stride); template void host_gpsv_interleaved_batch(rocsparse_gpsv_interleaved_alg algo, rocsparse_int m, rocsparse_float_complex* ds, rocsparse_float_complex* dl, rocsparse_float_complex* d, rocsparse_float_complex* du, rocsparse_float_complex* dw, rocsparse_float_complex* x, rocsparse_int batch_count, rocsparse_int batch_stride); /* * =========================================================================== * conversion SPARSE * =========================================================================== */ template rocsparse_status host_nnz(rocsparse_direction dirA, rocsparse_int m, rocsparse_int n, const rocsparse_float_complex* A, rocsparse_int lda, rocsparse_int* nnz_per_row_columns, rocsparse_int* nnz_total_dev_host_ptr); template void host_csr_to_gebsr(rocsparse_direction direction, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const std::vector& csr_val, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base csr_base, std::vector& bsr_val, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, rocsparse_index_base bsr_base); template void host_gebsr_to_gebsc(rocsparse_int Mb, rocsparse_int Nb, rocsparse_int nnzb, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, const std::vector& bsr_val, rocsparse_int row_block_dim, rocsparse_int col_block_dim, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_action action, rocsparse_index_base base); template void host_gebsr_to_csr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val, const std::vector& bsr_row_ptr, const std::vector& bsr_col_ind, rocsparse_int row_block_dim, rocsparse_int col_block_dim, rocsparse_index_base bsr_base, std::vector& csr_val, std::vector& csr_row_ptr, std::vector& csr_col_ind, rocsparse_index_base csr_base); template void host_gebsr_to_gebsr(rocsparse_direction direction, rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, const std::vector& bsr_val_A, const std::vector& bsr_row_ptr_A, const std::vector& bsr_col_ind_A, rocsparse_int row_block_dim_A, rocsparse_int col_block_dim_A, rocsparse_index_base base_A, std::vector& bsr_val_C, std::vector& bsr_row_ptr_C, std::vector& bsr_col_ind_C, rocsparse_int row_block_dim_C, rocsparse_int col_block_dim_C, rocsparse_index_base base_C); template void host_bsr_to_bsc(rocsparse_int mb, rocsparse_int nb, rocsparse_int nnzb, rocsparse_int bsr_dim, const rocsparse_int* bsr_row_ptr, const rocsparse_int* bsr_col_ind, const rocsparse_float_complex* bsr_val, std::vector& bsc_row_ind, std::vector& bsc_col_ptr, std::vector& bsc_val, rocsparse_index_base bsr_base, rocsparse_index_base bsc_base); template void host_csr_to_hyb(rocsparse_int M, rocsparse_int nnz, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, const std::vector& csr_val, std::vector& ell_col_ind, std::vector& ell_val, rocsparse_int& ell_width, rocsparse_int& ell_nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, rocsparse_int& coo_nnz, rocsparse_hyb_partition part, rocsparse_index_base base); template void host_csr_to_csr_compress(rocsparse_int M, rocsparse_int N, rocsparse_int nnz, const std::vector& csr_row_ptr_A, const std::vector& csr_col_ind_A, const std::vector& csr_val_A, std::vector& csr_row_ptr_C, std::vector& csr_col_ind_C, std::vector& csr_val_C, rocsparse_index_base base, rocsparse_float_complex tol); template void host_ell_to_csr(rocsparse_int M, rocsparse_int N, const std::vector& ell_col_ind, const std::vector& ell_val, rocsparse_int ell_width, std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, rocsparse_int& csr_nnz, rocsparse_index_base ell_base, rocsparse_index_base csr_base); template void host_coosort_by_column(rocsparse_int M, rocsparse_int nnz, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val); #define INSTANTIATE1(TYPE) \ template void host_bsr_to_csr(rocsparse_direction direction, \ rocsparse_int mb, \ rocsparse_int nb, \ rocsparse_int nnzb, \ const std::vector& bsr_val, \ const std::vector& bsr_row_ptr, \ const std::vector& bsr_col_ind, \ rocsparse_int block_dim, \ rocsparse_index_base bsr_base, \ std::vector& csr_val, \ std::vector& csr_row_ptr, \ std::vector& csr_col_ind, \ rocsparse_index_base csr_base); \ template void host_csr_to_bsr(rocsparse_direction direction, \ rocsparse_int m, \ rocsparse_int n, \ rocsparse_int nnz, \ const std::vector& csr_val, \ const std::vector& csr_row_ptr, \ const std::vector& csr_col_ind, \ rocsparse_int block_dim, \ rocsparse_index_base csr_base, \ std::vector& bsr_val, \ std::vector& bsr_row_ptr, \ std::vector& bsr_col_ind, \ rocsparse_index_base bsr_base); \ template void host_bsrpad_value(rocsparse_int m, \ rocsparse_int mb, \ rocsparse_int nnzb, \ rocsparse_int block_dim, \ TYPE value, \ TYPE * bsr_val, \ const rocsparse_int* bsr_row_ptr, \ const rocsparse_int* bsr_col_ind, \ rocsparse_index_base bsr_base); #define INSTANTIATE2(ITYPE, TTYPE) \ template void host_gemvi(ITYPE M, \ ITYPE N, \ TTYPE alpha, \ const TTYPE* A, \ ITYPE lda, \ ITYPE nnz, \ const TTYPE* x_val, \ const ITYPE* x_ind, \ TTYPE beta, \ TTYPE* y, \ rocsparse_index_base base); \ template void host_coo_to_dense(ITYPE m, \ ITYPE n, \ ITYPE nnz, \ rocsparse_index_base base, \ const std::vector& coo_val, \ const std::vector& coo_row_ind, \ const std::vector& coo_col_ind, \ std::vector& A, \ ITYPE ld, \ rocsparse_order order); \ template void host_dense_to_coo(ITYPE m, \ ITYPE n, \ rocsparse_index_base base, \ const std::vector& A, \ ITYPE ld, \ rocsparse_order order, \ const std::vector& nnz_per_row, \ std::vector& coo_val, \ std::vector& coo_row_ind, \ std::vector& coo_col_ind); \ template void host_coosv(rocsparse_operation trans, \ ITYPE M, \ int64_t nnz, \ TTYPE alpha, \ const std::vector& coo_row_ind, \ const std::vector& coo_col_ind, \ const std::vector& coo_val, \ const std::vector& x, \ std::vector& y, \ rocsparse_diag_type diag_type, \ rocsparse_fill_mode fill_mode, \ rocsparse_index_base base, \ ITYPE* struct_pivot, \ ITYPE* numeric_pivot); \ template void host_coomm(ITYPE M, \ ITYPE N, \ ITYPE K, \ int64_t NNZ, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* coo_row_ind_A, \ const ITYPE* coo_col_ind_A, \ const TTYPE* coo_val_A, \ const TTYPE* B, \ ITYPE ldb, \ TTYPE beta, \ TTYPE* C, \ ITYPE ldc, \ rocsparse_order order, \ rocsparse_index_base base); \ template void host_coomm_batched(ITYPE M, \ ITYPE N, \ ITYPE K, \ int64_t NNZ, \ ITYPE batch_count_A, \ int64_t batch_stride_A, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* coo_row_ind_A, \ const ITYPE* coo_col_ind_A, \ const TTYPE* coo_val_A, \ const TTYPE* B, \ ITYPE ldb, \ ITYPE batch_count_B, \ int64_t batch_stride_B, \ TTYPE beta, \ TTYPE* C, \ ITYPE ldc, \ ITYPE batch_count_C, \ int64_t batch_stride_C, \ rocsparse_order order, \ rocsparse_index_base base); \ template void host_coosm(ITYPE M, \ ITYPE nrhs, \ int64_t nnz, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* coo_row_ind, \ const ITYPE* coo_col_ind, \ const TTYPE* coo_val, \ TTYPE* B, \ ITYPE ldb, \ rocsparse_diag_type diag_type, \ rocsparse_fill_mode fill_mode, \ rocsparse_index_base base, \ ITYPE* struct_pivot, \ ITYPE* numeric_pivot); \ template void host_axpby(ITYPE size, \ ITYPE nnz, \ TTYPE alpha, \ const TTYPE* x_val, \ const ITYPE* x_ind, \ TTYPE beta, \ TTYPE* y, \ rocsparse_index_base base); \ template void host_gthr( \ ITYPE nnz, const TTYPE* y, TTYPE* x_val, const ITYPE* x_ind, rocsparse_index_base base); \ template void host_roti(ITYPE nnz, \ TTYPE * x_val, \ const ITYPE* x_ind, \ TTYPE* y, \ const TTYPE* c, \ const TTYPE* s, \ rocsparse_index_base base); \ template void host_doti(ITYPE nnz, \ const TTYPE* x_val, \ const ITYPE* x_ind, \ const TTYPE* y, \ TTYPE* result, \ rocsparse_index_base base); \ template void host_dotci(ITYPE nnz, \ const TTYPE* x_val, \ const ITYPE* x_ind, \ const TTYPE* y, \ TTYPE* result, \ rocsparse_index_base base); \ template void host_sctr( \ ITYPE nnz, const TTYPE* x_val, const ITYPE* x_ind, TTYPE* y, rocsparse_index_base base) #define INSTANTIATE3(ITYPE, JTYPE, TTYPE) \ template void host_csr_to_csc(JTYPE M, \ JTYPE N, \ ITYPE nnz, \ const ITYPE* csr_row_ptr, \ const JTYPE* csr_col_ind, \ const TTYPE* csr_val, \ std::vector& csc_row_ind, \ std::vector& csc_col_ptr, \ std::vector& csc_val, \ rocsparse_action action, \ rocsparse_index_base base); \ template void host_csrsv(rocsparse_operation trans, \ JTYPE M, \ ITYPE nnz, \ TTYPE alpha, \ const ITYPE* csr_row_ptr, \ const JTYPE* csr_col_ind, \ const TTYPE* csr_val, \ const TTYPE* x, \ TTYPE* y, \ rocsparse_diag_type diag_type, \ rocsparse_fill_mode fill_mode, \ rocsparse_index_base base, \ JTYPE* struct_pivot, \ JTYPE* numeric_pivot); \ template void host_csrmm(JTYPE M, \ JTYPE N, \ JTYPE K, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* csr_row_ptr_A, \ const JTYPE* csr_col_ind_A, \ const TTYPE* csr_val_A, \ const TTYPE* B, \ JTYPE ldb, \ TTYPE beta, \ TTYPE* C, \ JTYPE ldc, \ rocsparse_order order, \ rocsparse_index_base base, \ bool force_conj_A); \ template void host_csrmm_batched(JTYPE M, \ JTYPE N, \ JTYPE K, \ JTYPE batch_count_A, \ ITYPE offsets_batch_stride_A, \ ITYPE columns_values_batch_stride_A, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* csr_row_ptr_A, \ const JTYPE* csr_col_ind_A, \ const TTYPE* csr_val_A, \ const TTYPE* B, \ JTYPE ldb, \ JTYPE batch_count_B, \ ITYPE batch_stride_B, \ TTYPE beta, \ TTYPE* C, \ JTYPE ldc, \ JTYPE batch_count_C, \ ITYPE batch_stride_C, \ rocsparse_order order, \ rocsparse_index_base base, \ bool force_conj_A); \ template void host_cscmm(JTYPE M, \ JTYPE N, \ JTYPE K, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* csc_col_ptr_A, \ const JTYPE* csc_row_ind_A, \ const TTYPE* csc_val_A, \ const TTYPE* B, \ JTYPE ldb, \ TTYPE beta, \ TTYPE* C, \ JTYPE ldc, \ rocsparse_order order, \ rocsparse_index_base base); \ template void host_cscmm_batched(JTYPE M, \ JTYPE N, \ JTYPE K, \ JTYPE batch_count_A, \ ITYPE offsets_batch_stride_A, \ ITYPE rows_values_batch_stride_A, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* csc_col_ptr_A, \ const JTYPE* csc_row_ind_A, \ const TTYPE* csc_val_A, \ const TTYPE* B, \ JTYPE ldb, \ JTYPE batch_count_B, \ ITYPE batch_stride_B, \ TTYPE beta, \ TTYPE* C, \ JTYPE ldc, \ JTYPE batch_count_C, \ ITYPE batch_stride_C, \ rocsparse_order order, \ rocsparse_index_base base); \ template void host_csrsm(JTYPE M, \ JTYPE nrhs, \ ITYPE nnz, \ rocsparse_operation transA, \ rocsparse_operation transB, \ TTYPE alpha, \ const ITYPE* csr_row_ptr, \ const JTYPE* csr_col_ind, \ const TTYPE* csr_val, \ TTYPE* B, \ JTYPE ldb, \ rocsparse_diag_type diag_type, \ rocsparse_fill_mode fill_mode, \ rocsparse_index_base base, \ JTYPE* struct_pivot, \ JTYPE* numeric_pivot); \ template void host_bsrgemm_nnzb(JTYPE Mb, \ JTYPE Nb, \ JTYPE Kb, \ JTYPE block_dim, \ const TTYPE* alpha, \ const ITYPE* bsr_row_ptr_A, \ const JTYPE* bsr_col_ind_A, \ const ITYPE* bsr_row_ptr_B, \ const JTYPE* bsr_col_ind_B, \ const TTYPE* beta, \ const ITYPE* bsr_row_ptr_D, \ const JTYPE* bsr_col_ind_D, \ ITYPE* bsr_row_ptr_C, \ ITYPE* nnzb_C, \ rocsparse_index_base base_A, \ rocsparse_index_base base_B, \ rocsparse_index_base base_C, \ rocsparse_index_base base_D); \ template void host_bsrgemm(rocsparse_direction dir, \ JTYPE Mb, \ JTYPE Nb, \ JTYPE Kb, \ JTYPE block_dim, \ const TTYPE* alpha, \ const ITYPE* bsr_row_ptr_A, \ const JTYPE* bsr_col_ind_A, \ const TTYPE* bsr_val_A, \ const ITYPE* bsr_row_ptr_B, \ const JTYPE* bsr_col_ind_B, \ const TTYPE* bsr_val_B, \ const TTYPE* beta, \ const ITYPE* bsr_row_ptr_D, \ const JTYPE* bsr_col_ind_D, \ const TTYPE* bsr_val_D, \ const ITYPE* bsr_row_ptr_C, \ JTYPE* bsr_col_ind_C, \ TTYPE* bsr_val_C, \ rocsparse_index_base base_A, \ rocsparse_index_base base_B, \ rocsparse_index_base base_C, \ rocsparse_index_base base_D); \ template void host_csrgemm_nnz(JTYPE M, \ JTYPE N, \ JTYPE K, \ const TTYPE* alpha, \ const ITYPE* csr_row_ptr_A, \ const JTYPE* csr_col_ind_A, \ const ITYPE* csr_row_ptr_B, \ const JTYPE* csr_col_ind_B, \ const TTYPE* beta, \ const ITYPE* csr_row_ptr_D, \ const JTYPE* csr_col_ind_D, \ ITYPE* csr_row_ptr_C, \ ITYPE* nnz_C, \ rocsparse_index_base base_A, \ rocsparse_index_base base_B, \ rocsparse_index_base base_C, \ rocsparse_index_base base_D); \ template void host_csrgemm(JTYPE M, \ JTYPE N, \ JTYPE L, \ const TTYPE* alpha, \ const ITYPE* csr_row_ptr_A, \ const JTYPE* csr_col_ind_A, \ const TTYPE* csr_val_A, \ const ITYPE* csr_row_ptr_B, \ const JTYPE* csr_col_ind_B, \ const TTYPE* csr_val_B, \ const TTYPE* beta, \ const ITYPE* csr_row_ptr_D, \ const JTYPE* csr_col_ind_D, \ const TTYPE* csr_val_D, \ const ITYPE* csr_row_ptr_C, \ JTYPE* csr_col_ind_C, \ TTYPE* csr_val_C, \ rocsparse_index_base base_A, \ rocsparse_index_base base_B, \ rocsparse_index_base base_C, \ rocsparse_index_base base_D); #define INSTANTIATE4(DIR, ITYPE, JTYPE, TTYPE) \ template void host_dense2csx(JTYPE m, \ JTYPE n, \ rocsparse_index_base base, \ const TTYPE* A, \ ITYPE ld, \ rocsparse_order order, \ const ITYPE* nnz_per_row_columns, \ TTYPE* csx_val, \ ITYPE* csx_row_col_ptr, \ JTYPE* csx_col_row_ind); \ template void host_csx2dense(JTYPE m, \ JTYPE n, \ rocsparse_index_base base, \ rocsparse_order order, \ const TTYPE* csx_val, \ const ITYPE* csx_row_col_ptr, \ const JTYPE* csx_col_row_ind, \ TTYPE* A, \ ITYPE ld); #define INSTANTIATE_IJAXYT(ITYPE, JTYPE, ATYPE, XTYPE, YTYPE, TTYPE) \ template void host_bsrmv(rocsparse_direction dir, \ rocsparse_operation trans, \ JTYPE mb, \ JTYPE nb, \ ITYPE nnzb, \ TTYPE alpha, \ const ITYPE* bsr_row_ptr, \ const JTYPE* bsr_col_ind, \ const ATYPE* bsr_val, \ JTYPE bsr_dim, \ const XTYPE* x, \ TTYPE beta, \ YTYPE* y, \ rocsparse_index_base base); \ template void host_cscmv(rocsparse_operation trans, \ JTYPE M, \ JTYPE N, \ ITYPE nnz, \ TTYPE alpha, \ const ITYPE* csc_col_ptr, \ const JTYPE* csc_row_ind, \ const ATYPE* csc_val, \ const XTYPE* x, \ TTYPE beta, \ YTYPE* y, \ rocsparse_index_base base, \ rocsparse_matrix_type matrix_type, \ rocsparse_spmv_alg algo); \ template void host_csrmv(rocsparse_operation trans, \ JTYPE M, \ JTYPE N, \ ITYPE nnz, \ TTYPE alpha, \ const ITYPE* csr_row_ptr, \ const JTYPE* csr_col_ind, \ const ATYPE* csr_val, \ const XTYPE* x, \ TTYPE beta, \ YTYPE* y, \ rocsparse_index_base base, \ rocsparse_matrix_type matrix_type, \ rocsparse_spmv_alg algo, \ bool force_conj) #define INSTANTIATE6(ITYPE, ATYPE, XTYPE, YTYPE, TTYPE) \ template void host_coomv(rocsparse_operation trans, \ ITYPE M, \ ITYPE N, \ int64_t nnz, \ TTYPE alpha, \ const ITYPE* coo_row_ind, \ const ITYPE* coo_col_ind, \ const ATYPE* coo_val, \ const XTYPE* x, \ TTYPE beta, \ YTYPE* y, \ rocsparse_index_base base); \ template void host_coomv_aos(rocsparse_operation trans, \ ITYPE M, \ ITYPE N, \ int64_t nnz, \ TTYPE alpha, \ const ITYPE* coo_ind, \ const ATYPE* coo_val, \ const XTYPE* x, \ TTYPE beta, \ YTYPE* y, \ rocsparse_index_base base); \ template void host_ellmv(rocsparse_operation trans, \ ITYPE M, \ ITYPE N, \ TTYPE alpha, \ const ITYPE* ell_col_ind, \ const ATYPE* ell_val, \ ITYPE ell_width, \ const XTYPE* x, \ TTYPE beta, \ YTYPE* y, \ rocsparse_index_base base); INSTANTIATE1(float); INSTANTIATE1(double); INSTANTIATE1(rocsparse_float_complex); INSTANTIATE1(rocsparse_double_complex); INSTANTIATE2(int32_t, float); INSTANTIATE2(int32_t, double); INSTANTIATE2(int32_t, rocsparse_float_complex); INSTANTIATE2(int32_t, rocsparse_double_complex); INSTANTIATE2(int64_t, float); INSTANTIATE2(int64_t, double); INSTANTIATE2(int64_t, rocsparse_float_complex); INSTANTIATE2(int64_t, rocsparse_double_complex); INSTANTIATE3(int32_t, int32_t, float); INSTANTIATE3(int32_t, int32_t, double); INSTANTIATE3(int32_t, int32_t, rocsparse_float_complex); INSTANTIATE3(int32_t, int32_t, rocsparse_double_complex); INSTANTIATE3(int64_t, int32_t, float); INSTANTIATE3(int64_t, int32_t, double); INSTANTIATE3(int64_t, int32_t, rocsparse_float_complex); INSTANTIATE3(int64_t, int32_t, rocsparse_double_complex); INSTANTIATE3(int64_t, int64_t, float); INSTANTIATE3(int64_t, int64_t, double); INSTANTIATE3(int64_t, int64_t, rocsparse_float_complex); INSTANTIATE3(int64_t, int64_t, rocsparse_double_complex); INSTANTIATE4(rocsparse_direction_row, int32_t, int32_t, float); INSTANTIATE4(rocsparse_direction_row, int32_t, int32_t, double); INSTANTIATE4(rocsparse_direction_row, int32_t, int32_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_row, int32_t, int32_t, rocsparse_double_complex); INSTANTIATE4(rocsparse_direction_row, int64_t, int32_t, float); INSTANTIATE4(rocsparse_direction_row, int64_t, int32_t, double); INSTANTIATE4(rocsparse_direction_row, int64_t, int32_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_row, int64_t, int32_t, rocsparse_double_complex); INSTANTIATE4(rocsparse_direction_row, int64_t, int64_t, float); INSTANTIATE4(rocsparse_direction_row, int64_t, int64_t, double); INSTANTIATE4(rocsparse_direction_row, int64_t, int64_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_row, int64_t, int64_t, rocsparse_double_complex); INSTANTIATE4(rocsparse_direction_column, int32_t, int32_t, float); INSTANTIATE4(rocsparse_direction_column, int32_t, int32_t, double); INSTANTIATE4(rocsparse_direction_column, int32_t, int32_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_column, int32_t, int32_t, rocsparse_double_complex); INSTANTIATE4(rocsparse_direction_column, int64_t, int32_t, float); INSTANTIATE4(rocsparse_direction_column, int64_t, int32_t, double); INSTANTIATE4(rocsparse_direction_column, int64_t, int32_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_column, int64_t, int32_t, rocsparse_double_complex); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, float); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, double); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, rocsparse_double_complex); INSTANTIATE6(int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE6(int64_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE6(int32_t, int8_t, int8_t, float, float); INSTANTIATE6(int64_t, int8_t, int8_t, float, float); INSTANTIATE6( int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int32_t, int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_IJAXYT(int64_t, int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_IJAXYT(int64_t, int64_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_IJAXYT(int32_t, int32_t, int8_t, int8_t, float, float); INSTANTIATE_IJAXYT(int64_t, int32_t, int8_t, int8_t, float, float); INSTANTIATE_IJAXYT(int64_t, int64_t, int8_t, int8_t, float, float); INSTANTIATE_IJAXYT(int32_t, int32_t, float, double, double, double); INSTANTIATE_IJAXYT(int64_t, int32_t, float, double, double, double); INSTANTIATE_IJAXYT(int64_t, int64_t, float, double, double, double); INSTANTIATE_IJAXYT(int32_t, int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int64_t, int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int64_t, int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int32_t, int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int64_t, int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int64_t, int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int32_t, int32_t, rocsparse_float_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int64_t, int32_t, rocsparse_float_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int64_t, int64_t, rocsparse_float_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int32_t, int32_t, float, float, float, float); INSTANTIATE_IJAXYT(int64_t, int32_t, float, float, float, float); INSTANTIATE_IJAXYT(int64_t, int64_t, float, float, float, float); INSTANTIATE_IJAXYT(int32_t, int32_t, double, double, double, double); INSTANTIATE_IJAXYT(int64_t, int32_t, double, double, double, double); INSTANTIATE_IJAXYT(int64_t, int64_t, double, double, double, double); INSTANTIATE_IJAXYT(int32_t, int32_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int64_t, int32_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int64_t, int64_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int32_t, int32_t, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int64_t, int32_t, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_IJAXYT(int64_t, int64_t, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6( int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE6( int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6( int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6(int32_t, float, float, float, float); INSTANTIATE6(int64_t, float, float, float, float); INSTANTIATE6(int32_t, double, double, double, double); INSTANTIATE6(int64_t, double, double, double, double); INSTANTIATE6(int32_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE6(int64_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE6(int32_t, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6(int64_t, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6(int32_t, rocsparse_float_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6(int64_t, rocsparse_float_complex, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6(int32_t, float, double, double, double); INSTANTIATE6(int64_t, float, double, double, double); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer.cpp000066400000000000000000000101301447342677400235570ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_importer.hpp" template <> rocsparse_status rocsparse_type_conversion(const size_t& x, size_t& y) { y = x; return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const int32_t& x, int32_t& y) { y = x; return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const int64_t& x, int64_t& y) { y = x; return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const int32_t& x, int64_t& y) { y = x; return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const int64_t& x, size_t& y) { if(x < 0) { std::cerr << "corrupted conversion from int64_t to size_t." << std::endl; return rocsparse_status_invalid_value; } else { y = static_cast(x); return rocsparse_status_success; } } template <> rocsparse_status rocsparse_type_conversion(const int32_t& x, size_t& y) { if(x < 0) { std::cerr << "corrupted conversion from int32_t to size_t." << std::endl; return rocsparse_status_invalid_value; } else { y = static_cast(x); return rocsparse_status_success; } } template <> rocsparse_status rocsparse_type_conversion(const int64_t& x, int32_t& y) { static constexpr int32_t int32max = std::numeric_limits::max(); if(x > int32max) { std::cerr << "corrupted conversion from int64_t to int32_t." << std::endl; return rocsparse_status_invalid_value; } static constexpr int32_t int32min = std::numeric_limits::min(); if(x < int32min) { std::cerr << "corrupted conversion from int64_t to int32_t." << std::endl; return rocsparse_status_invalid_value; } y = static_cast(x); return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const size_t& x, int32_t& y) { static constexpr int32_t int32max = std::numeric_limits::max(); if(x > int32max) { std::cerr << "corrupted conversion from size_t to int32_t." << std::endl; return rocsparse_status_invalid_value; } y = static_cast(x); return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const size_t& x, int64_t& y) { static constexpr int64_t int64max = std::numeric_limits::max(); if(x > int64max) { std::cerr << "corrupted conversion from size_t to int64_t." << std::endl; return rocsparse_status_invalid_value; } y = static_cast(x); return rocsparse_status_success; } template <> rocsparse_status rocsparse_type_conversion(const float& x, double& y) { y = static_cast(x); return rocsparse_status_success; } rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_impls.hpp000066400000000000000000000026761447342677400250100ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #include "rocsparse_importer_matrixmarket.hpp" #include "rocsparse_importer_rocalution.hpp" #include "rocsparse_importer_rocsparseio.hpp" rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_matrixmarket.cpp000066400000000000000000000307611447342677400263630ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_importer_matrixmarket.hpp" #include rocsparse_importer_matrixmarket::rocsparse_importer_matrixmarket(const std::string& filename_) : m_filename(filename_) { } /* ============================================================================================ */ /*! \brief Read matrix from mtx file in COO format */ static inline void read_mtx_value(std::istringstream& is, int64_t& row, int64_t& col, int8_t& val) { is >> row >> col >> val; } static inline void read_mtx_value(std::istringstream& is, int64_t& row, int64_t& col, float& val) { is >> row >> col >> val; } static inline void read_mtx_value(std::istringstream& is, int64_t& row, int64_t& col, double& val) { is >> row >> col >> val; } static inline void read_mtx_value(std::istringstream& is, int64_t& row, int64_t& col, rocsparse_float_complex& val) { float real{}; float imag{}; is >> row >> col >> real >> imag; val = {real, imag}; } static inline void read_mtx_value(std::istringstream& is, int64_t& row, int64_t& col, rocsparse_double_complex& val) { double real{}; double imag{}; is >> row >> col >> real >> imag; val = {real, imag}; } template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_csx( rocsparse_direction* dir, J* m, J* n, I* nnz, rocsparse_index_base* base) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_csx(I* ptr, J* ind, T* val) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_gebsx(rocsparse_direction* dir, rocsparse_direction* dirb, J* mb, J* nb, I* nnzb, J* block_dim_row, J* block_dim_column, rocsparse_index_base* base) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_gebsx(I* ptr, J* ind, T* val) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_coo(I* m, I* n, int64_t* nnz, rocsparse_index_base* base) { char line[1024]; f = fopen(this->m_filename.c_str(), "r"); if(!f) { std::cerr << "rocsparse_importer_matrixmarket::import_sparse_coo: cannot open file '" << this->m_filename << "' " << std::endl; return rocsparse_status_internal_error; } // Check for banner if(!fgets(line, 1024, f)) { throw rocsparse_status_internal_error; } char banner[16]; char array[16]; char coord[16]; char type[16]; // Extract banner if(sscanf(line, "%15s %15s %15s %15s %15s", banner, array, coord, this->m_data, type) != 5) { throw rocsparse_status_internal_error; } // Convert to lower case for(char* p = array; *p != '\0'; *p = tolower(*p), p++) ; for(char* p = coord; *p != '\0'; *p = tolower(*p), p++) ; for(char* p = this->m_data; *p != '\0'; *p = tolower(*p), p++) ; for(char* p = type; *p != '\0'; *p = tolower(*p), p++) ; // Check banner if(strncmp(line, "%%MatrixMarket", 14) != 0) { throw rocsparse_status_internal_error; } // Check array type if(strcmp(array, "matrix") != 0) { throw rocsparse_status_internal_error; } // Check coord if(strcmp(coord, "coordinate") != 0) { throw rocsparse_status_internal_error; } // Check this->m_data if(strcmp(this->m_data, "real") != 0 && strcmp(this->m_data, "integer") != 0 && strcmp(this->m_data, "pattern") != 0 && strcmp(this->m_data, "complex") != 0) { throw rocsparse_status_internal_error; } // Check type if(strcmp(type, "general") != 0 && strcmp(type, "symmetric") != 0) { throw rocsparse_status_internal_error; } // Symmetric flag this->m_symm = !strcmp(type, "symmetric"); // Skip comments while(fgets(line, 1024, f)) { if(line[0] != '%') { break; } } // Read dimensions I snnz; int inrow; int incol; int innz; sscanf(line, "%d %d %d", &inrow, &incol, &innz); rocsparse_status status; status = rocsparse_type_conversion(inrow, m[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(incol, n[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(innz, snnz); if(status != rocsparse_status_success) return status; if(this->m_symm) { // // // We need to count how many diagonal elements are in the file. // // // // Record position. // fpos_t pos; if(0 != fgetpos(this->f, &pos)) { throw rocsparse_status_internal_error; } // // Count diagonal coefficients. // I num_diagonal_coefficients = 0; while(fgets(line, 1024, f)) { int32_t irow{}; int32_t icol{}; sscanf(line, "%d %d", &irow, &icol); if(irow == icol) { ++num_diagonal_coefficients; } } // // Set position. // if(0 != fsetpos(this->f, &pos)) { throw rocsparse_status_internal_error; } // // Now calculate the right number of coefficients. // snnz = (snnz - num_diagonal_coefficients) * 2 + num_diagonal_coefficients; } status = rocsparse_type_conversion(snnz, nnz[0]); if(status != rocsparse_status_success) return status; base[0] = rocsparse_index_base_one; this->m_nnz = snnz; return rocsparse_status_success; } template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_coo(I* row_ind, I* col_ind, T* val) { char line[1024]; const size_t nnz = this->m_nnz; std::vector unsorted_row(nnz); std::vector unsorted_col(nnz); std::vector unsorted_val(nnz); // Read entries I idx = 0; while(fgets(line, 1024, f)) { if(idx >= nnz) { throw rocsparse_status_internal_error; } int64_t irow{}; int64_t icol{}; T ival; std::istringstream ss(line); if(!strcmp(this->m_data, "pattern")) { ss >> irow >> icol; ival = static_cast(1); } else { read_mtx_value(ss, irow, icol, ival); } unsorted_row[idx] = (I)irow; unsorted_col[idx] = (I)icol; unsorted_val[idx] = ival; ++idx; if(this->m_symm && irow != icol) { if(idx >= nnz) { throw rocsparse_status_internal_error; } unsorted_row[idx] = (I)icol; unsorted_col[idx] = (I)irow; unsorted_val[idx] = ival; ++idx; } } fclose(f); // Sort by row and column index std::vector perm(nnz); for(I i = 0; i < nnz; ++i) { perm[i] = i; } std::sort(perm.begin(), perm.end(), [&](const I& a, const I& b) { if(unsorted_row[a] < unsorted_row[b]) { return true; } else if(unsorted_row[a] == unsorted_row[b]) { return (unsorted_col[a] < unsorted_col[b]); } else { return false; } }); for(I i = 0; i < nnz; ++i) { row_ind[i] = unsorted_row[perm[i]]; } for(I i = 0; i < nnz; ++i) { col_ind[i] = unsorted_col[perm[i]]; } for(I i = 0; i < nnz; ++i) { val[i] = unsorted_val[perm[i]]; } return rocsparse_status_success; } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_csx(I*, J*, T*); \ template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_gebsx(I*, J*, T*) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_coo( \ I* row_ind, I* col_ind, T* val) #define INSTANTIATE_I(I) \ template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_coo( \ I* m, I* n, int64_t* nnz, rocsparse_index_base* base) #define INSTANTIATE_IJ(I, J) \ template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_csx( \ rocsparse_direction*, J*, J*, I*, rocsparse_index_base*); \ template rocsparse_status rocsparse_importer_matrixmarket::import_sparse_gebsx( \ rocsparse_direction*, rocsparse_direction*, J*, J*, I*, J*, J*, rocsparse_index_base*) INSTANTIATE_I(int32_t); INSTANTIATE_I(int64_t); INSTANTIATE_IJ(int32_t, int32_t); INSTANTIATE_IJ(int64_t, int32_t); INSTANTIATE_IJ(int64_t, int64_t); INSTANTIATE_TIJ(int8_t, int32_t, int32_t); INSTANTIATE_TIJ(int8_t, int64_t, int32_t); INSTANTIATE_TIJ(int8_t, int64_t, int64_t); INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(int8_t, int32_t); INSTANTIATE_TI(int8_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_matrixmarket.hpp000066400000000000000000000061411447342677400263630ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_IMPORTER_MATRIXMARKET_HPP #define ROCSPARSE_IMPORTER_MATRIXMARKET_HPP #include "rocsparse_importer.hpp" class rocsparse_importer_matrixmarket : public rocsparse_importer { protected: std::string m_filename; public: rocsparse_importer_matrixmarket(const std::string& filename_); private: FILE* f; size_t m_nnz; char m_data[16]; int m_symm; public: template rocsparse_status import_sparse_csx(rocsparse_direction* dir, J* m, J* n, I* nnz, rocsparse_index_base* base); template rocsparse_status import_sparse_csx(I* ptr, J* ind, T* val); template rocsparse_status import_sparse_gebsx(rocsparse_direction* dir, rocsparse_direction* dirb, J* mb, J* nb, I* nnzb, J* block_dim_row, J* block_dim_column, rocsparse_index_base* base); template rocsparse_status import_sparse_gebsx(I* ptr, J* ind, T* val); template rocsparse_status import_sparse_coo(I* m, I* n, int64_t* nnz, rocsparse_index_base* base); template rocsparse_status import_sparse_coo(I* row_ind, I* col_ind, T* val); }; #endif rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_rocalution.cpp000066400000000000000000000274441447342677400260360ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_importer_rocalution.hpp" static inline void read_csr_values(std::ifstream& in, int64_t nnz, int8_t* csr_val) { // Temporary array to convert from double to float std::vector tmp(nnz); // Read in double values in.read((char*)tmp.data(), sizeof(double) * nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(int64_t i = 0; i < nnz; ++i) { csr_val[i] = static_cast(tmp[i]); } } static inline void read_csr_values(std::ifstream& in, int64_t nnz, float* csr_val) { // Temporary array to convert from double to float std::vector tmp(nnz); // Read in double values in.read((char*)tmp.data(), sizeof(double) * nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(int64_t i = 0; i < nnz; ++i) { csr_val[i] = static_cast(tmp[i]); } } static inline void read_csr_values(std::ifstream& in, int64_t nnz, double* csr_val) { in.read((char*)csr_val, sizeof(double) * nnz); } static inline void read_csr_values(std::ifstream& in, int64_t nnz, rocsparse_float_complex* csr_val) { // Temporary array to convert from double to float complex std::vector tmp(nnz); // Read in double complex values in.read((char*)tmp.data(), sizeof(rocsparse_double_complex) * nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(int64_t i = 0; i < nnz; ++i) { csr_val[i] = rocsparse_float_complex(static_cast(std::real(tmp[i])), static_cast(std::imag(tmp[i]))); } } static inline void read_csr_values(std::ifstream& in, int64_t nnz, rocsparse_double_complex* csr_val) { in.read((char*)csr_val, sizeof(rocsparse_double_complex) * nnz); } rocsparse_importer_rocalution::rocsparse_importer_rocalution(const std::string& filename_) : m_filename(filename_) { } template rocsparse_status rocsparse_importer_rocalution::import_sparse_gebsx(rocsparse_direction* dir, rocsparse_direction* dirb, J* mb, J* nb, I* nnzb, J* block_dim_row, J* block_dim_column, rocsparse_index_base* base) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_rocalution::import_sparse_gebsx(I* ptr, J* ind, T* val) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_rocalution::import_sparse_coo(I* m, I* n, int64_t* nnz, rocsparse_index_base* base) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_rocalution::import_sparse_coo(I* row_ind, I* col_ind, T* val) { return rocsparse_status_not_implemented; } template rocsparse_status rocsparse_importer_rocalution::import_sparse_csx( rocsparse_direction* dir, J* m, J* n, I* nnz, rocsparse_index_base* base) { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Opening file '" << this->m_filename << "' ... " << std::endl; } this->m_info_csx.in = new std::ifstream(this->m_filename, std::ios::in | std::ios::binary); if(!this->m_info_csx.in->is_open()) { std::cerr << "cannot open file '" << this->m_filename << "'" << std::endl; return rocsparse_status_internal_error; } std::string header; std::getline(this->m_info_csx.in[0], header); if(header != "#rocALUTION binary csr file") { return rocsparse_status_internal_error; } int version; this->m_info_csx.in->read((char*)&version, sizeof(int)); int iM; int iN; int innz; this->m_info_csx.in->read((char*)&iM, sizeof(int)); this->m_info_csx.in->read((char*)&iN, sizeof(int)); this->m_info_csx.in->read((char*)&innz, sizeof(int)); rocsparse_status status; status = rocsparse_type_conversion(iM, m[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(iN, n[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(innz, nnz[0]); if(status != rocsparse_status_success) return status; dir[0] = rocsparse_direction_row; base[0] = rocsparse_index_base_zero; this->m_info_csx.m = iM; this->m_info_csx.nnz = innz; return rocsparse_status_success; } template rocsparse_status rocsparse_importer_rocalution::import_sparse_csx(I* ptr, J* ind, T* val) { const size_t M = this->m_info_csx.m; const size_t nnz = this->m_info_csx.nnz; const bool same_ptr_type = std::is_same(); const bool same_ind_type = std::is_same(); const bool same_val_type = std::is_same() || std::is_same(); const bool is_consistent = same_ptr_type && same_ind_type && same_val_type; if(is_consistent) { this->m_info_csx.in->read((char*)ptr, sizeof(int) * (M + 1)); this->m_info_csx.in->read((char*)ind, sizeof(int) * nnz); this->m_info_csx.in->read((char*)val, sizeof(T) * nnz); this->m_info_csx.in->close(); delete this->m_info_csx.in; this->m_info_csx.in = nullptr; { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Import done." << std::endl; } } } else { void* tmp_ptr = (void*)ptr; void* tmp_ind = (void*)ind; void* tmp_val = (void*)val; host_dense_vector tmp_ptrv; host_dense_vector tmp_indv; host_dense_vector tmp_valv; if(!same_ptr_type) { tmp_ptrv.resize((M + 1)); tmp_ptr = tmp_ptrv; } else { tmp_ptr = ptr; } if(!same_ind_type) { tmp_indv.resize(nnz); tmp_ind = tmp_indv; } else { tmp_ind = ind; } if(!same_val_type) { tmp_valv.resize(nnz); tmp_val = tmp_valv; } else { tmp_val = val; } this->m_info_csx.in->read((char*)tmp_ptr, sizeof(int) * (M + 1)); this->m_info_csx.in->read((char*)tmp_ind, sizeof(int) * nnz); read_csr_values(this->m_info_csx.in[0], (int64_t)nnz, (T*)tmp_val); // this->m_info_csx.in->read((char*)tmp_val, sizeof(double) * nnz); this->m_info_csx.in->close(); delete this->m_info_csx.in; this->m_info_csx.in = nullptr; { const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Import done." << std::endl; } } // // Copy back. // if(!same_ptr_type) { rocsparse_importer_copy_mixed_arrays(M + 1, ptr, (int*)tmp_ptr); } if(!same_ind_type) { rocsparse_importer_copy_mixed_arrays(nnz, ind, (int*)tmp_ind); } if(!same_val_type) { rocsparse_importer_copy_mixed_arrays(nnz, val, (T*)tmp_val); } } return rocsparse_status_success; } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_importer_rocalution::import_sparse_csx(I*, J*, T*); \ template rocsparse_status rocsparse_importer_rocalution::import_sparse_gebsx(I*, J*, T*) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_importer_rocalution::import_sparse_coo( \ I* row_ind, I* col_ind, T* val) #define INSTANTIATE_I(I) \ template rocsparse_status rocsparse_importer_rocalution::import_sparse_coo( \ I* m, I* n, int64_t* nnz, rocsparse_index_base* base) #define INSTANTIATE_IJ(I, J) \ template rocsparse_status rocsparse_importer_rocalution::import_sparse_csx( \ rocsparse_direction*, J*, J*, I*, rocsparse_index_base*); \ template rocsparse_status rocsparse_importer_rocalution::import_sparse_gebsx( \ rocsparse_direction*, rocsparse_direction*, J*, J*, I*, J*, J*, rocsparse_index_base*) INSTANTIATE_I(int32_t); INSTANTIATE_I(int64_t); INSTANTIATE_IJ(int32_t, int32_t); INSTANTIATE_IJ(int64_t, int32_t); INSTANTIATE_IJ(int64_t, int64_t); INSTANTIATE_TIJ(int8_t, int32_t, int32_t); INSTANTIATE_TIJ(int8_t, int64_t, int32_t); INSTANTIATE_TIJ(int8_t, int64_t, int64_t); INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(int8_t, int32_t); INSTANTIATE_TI(int8_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_rocalution.hpp000066400000000000000000000063311447342677400260330ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_IMPORTER_ROCALUTION_HPP #define ROCSPARSE_IMPORTER_ROCALUTION_HPP #include "rocsparse_importer.hpp" class rocsparse_importer_rocalution : public rocsparse_importer { protected: std::string m_filename; public: using IMPL = rocsparse_importer_rocalution; rocsparse_importer_rocalution(const std::string& filename_); public: template rocsparse_status import_sparse_gebsx(rocsparse_direction* dir, rocsparse_direction* dirb, J* mb, J* nb, I* nnzb, J* block_dim_row, J* block_dim_column, rocsparse_index_base* base); template rocsparse_status import_sparse_gebsx(I* ptr, J* ind, T* val); template rocsparse_status import_sparse_coo(I* m, I* n, int64_t* nnz, rocsparse_index_base* base); template rocsparse_status import_sparse_coo(I* row_ind, I* col_ind, T* val); template rocsparse_status import_sparse_csx(rocsparse_direction* dir, J* m, J* n, I* nnz, rocsparse_index_base* base); template rocsparse_status import_sparse_csx(I* ptr, J* ind, T* val); private: struct info_csx { size_t m{}; size_t nnz{}; std::ifstream* in{}; }; info_csx m_info_csx{}; public: }; #endif rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_rocsparseio.cpp000066400000000000000000000664151447342677400262110ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_importer_rocsparseio.hpp" #ifdef ROCSPARSEIO #define ROCSPARSE_CHECK_ROCSPARSEIO(iostatus_) \ if(iostatus_ != rocsparseio_status_success) \ { \ return rocsparse_status_internal_error; \ } template inline rocsparseio_status rocsparseio2rocsparse_convert(const X& x, Y& y); template <> inline rocsparseio_status rocsparseio2rocsparse_convert(const rocsparseio_direction& x, rocsparse_direction& y) { switch(x) { case rocsparseio_direction_row: { y = rocsparse_direction_row; return rocsparseio_status_success; } case rocsparseio_direction_column: { y = rocsparse_direction_column; return rocsparseio_status_success; } } return rocsparseio_status_invalid_value; } template <> inline rocsparseio_status rocsparseio2rocsparse_convert(const rocsparseio_order& x, rocsparse_order& y) { switch(x) { case rocsparseio_order_row: { y = rocsparse_order_row; return rocsparseio_status_success; } case rocsparseio_order_column: { y = rocsparse_order_column; return rocsparseio_status_success; } } return rocsparseio_status_invalid_value; } template <> inline rocsparseio_status rocsparseio2rocsparse_convert(const rocsparseio_index_base& x, rocsparse_index_base& y) { switch(x) { case rocsparseio_index_base_zero: { y = rocsparse_index_base_zero; return rocsparseio_status_success; } case rocsparseio_index_base_one: { y = rocsparse_index_base_one; return rocsparseio_status_success; } } return rocsparseio_status_invalid_value; } template inline rocsparseio_type type_tconvert(); template <> inline rocsparseio_type type_tconvert() { return rocsparseio_type_int32; }; template <> inline rocsparseio_type type_tconvert() { return rocsparseio_type_int64; }; template <> inline rocsparseio_type type_tconvert() { return rocsparseio_type_float32; }; template <> inline rocsparseio_type type_tconvert() { return rocsparseio_type_float64; }; template <> inline rocsparseio_type type_tconvert() { return rocsparseio_type_complex32; }; template <> inline rocsparseio_type type_tconvert() { return rocsparseio_type_complex64; }; #endif rocsparse_importer_rocsparseio::~rocsparse_importer_rocsparseio() { #ifdef ROCSPARSEIO const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Import done." << std::endl; } auto istatus = rocsparseio_close(this->m_handle); if(istatus != rocsparseio_status_success) { } #endif } rocsparse_importer_rocsparseio::rocsparse_importer_rocsparseio(const std::string& filename_) : m_filename(filename_) { #ifdef ROCSPARSEIO const char* env = getenv("GTEST_LISTENER"); if(!env || strcmp(env, "NO_PASS_LINE_IN_LOG")) { std::cout << "Opening file '" << this->m_filename << "' ... " << std::endl; } rocsparseio_status istatus; istatus = rocsparseio_open(&this->m_handle, rocsparseio_rwmode_read, this->m_filename.c_str()); if(istatus != rocsparseio_status_success) { std::cerr << "cannot open file '" << this->m_filename << "'" << std::endl; throw rocsparse_status_internal_error; } #else throw rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_coo(I* m, I* n, int64_t* nnz, rocsparse_index_base* base) { #ifdef ROCSPARSEIO size_t iM; size_t iN; size_t innz; rocsparseio_index_base ibase; rocsparseio_status istatus; istatus = rocsparseiox_read_metadata_sparse_coo(this->m_handle, &iM, &iN, &innz, &this->m_row_ind_type, &this->m_col_ind_type, &this->m_val_type, &ibase); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); rocsparse_status status; status = rocsparse_type_conversion(iM, m[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(iN, n[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(innz, nnz[0]); if(status != rocsparse_status_success) return status; this->m_nnz = innz; ROCSPARSE_CHECK_ROCSPARSEIO(rocsparseio2rocsparse_convert(ibase, *base)); return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_coo(I* row_ind, I* col_ind, T* val) { #ifdef ROCSPARSEIO rocsparseio_status istatus; const rocsparseio_type csr_ind_type = type_tconvert(), csr_val_type = type_tconvert(); const size_t NNZ = this->m_nnz; const bool same_ind_type = (this->m_ind_type == csr_ind_type); const bool same_val_type = (this->m_val_type == csr_val_type); const bool is_consistent = same_ind_type && same_val_type; if(is_consistent) { // // Import data. // istatus = rocsparseiox_read_sparse_coo(this->m_handle, row_ind, col_ind, val); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); } else { void *tmp_row_ind = (void*)row_ind, *tmp_col_ind = (void*)col_ind, *tmp_val = (void*)val; host_dense_vector tmp_row_indv, tmp_col_indv, tmp_valv; size_t sizeof_ind_type, sizeof_val_type; istatus = rocsparseio_type_get_size(this->m_ind_type, &sizeof_ind_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); istatus = rocsparseio_type_get_size(this->m_val_type, &sizeof_val_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); if(!same_ind_type) { tmp_row_indv.resize(NNZ * sizeof_ind_type); tmp_row_ind = tmp_row_indv; tmp_col_indv.resize(NNZ * sizeof_ind_type); tmp_col_ind = tmp_col_indv; } if(!same_val_type) { tmp_valv.resize(NNZ * sizeof_val_type); tmp_val = tmp_valv; } istatus = rocsparseiox_read_sparse_coo(this->m_handle, tmp_row_ind, tmp_col_ind, tmp_val); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); if(!same_ind_type) { switch(this->m_ind_type) { case rocsparseio_type_int32: { // // copy tmp_ind to ind. // rocsparse_importer_copy_mixed_arrays(NNZ, row_ind, (const int32_t*)tmp_row_ind); rocsparse_importer_copy_mixed_arrays(NNZ, col_ind, (const int32_t*)tmp_col_ind); break; } case rocsparseio_type_int64: { rocsparse_importer_copy_mixed_arrays(NNZ, row_ind, (const int64_t*)tmp_row_ind); rocsparse_importer_copy_mixed_arrays(NNZ, col_ind, (const int64_t*)tmp_col_ind); break; } case rocsparseio_type_float32: case rocsparseio_type_float64: case rocsparseio_type_complex32: case rocsparseio_type_complex64: { break; } } } if(!same_val_type) { switch(this->m_val_type) { case rocsparseio_type_int32: case rocsparseio_type_int64: { break; } case rocsparseio_type_float32: { // // copy val2 to val. // rocsparse_importer_copy_mixed_arrays(NNZ, val, (const float*)tmp_val); break; } case rocsparseio_type_float64: { rocsparse_importer_copy_mixed_arrays(NNZ, val, (const double*)tmp_val); break; } case rocsparseio_type_complex32: { rocsparse_importer_copy_mixed_arrays( NNZ, val, (const rocsparse_float_complex*)tmp_val); } case rocsparseio_type_complex64: { rocsparse_importer_copy_mixed_arrays( NNZ, val, (const rocsparse_double_complex*)tmp_val); break; } } } } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_gebsx(rocsparse_direction* dir, rocsparse_direction* dirb, J* mb, J* nb, I* nnzb, J* block_dim_row, J* block_dim_column, rocsparse_index_base* base) { #ifdef ROCSPARSEIO rocsparseio_status istatus; rocsparseio_direction idir, idirb; rocsparseio_index_base ibase; istatus = rocsparseio_open(&this->m_handle, rocsparseio_rwmode_read, this->m_filename.c_str()); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); size_t iMb; size_t iNb; size_t innzb; size_t irow_block_dim, icol_block_dim; istatus = rocsparseiox_read_metadata_sparse_gebsx(this->m_handle, &idir, &idirb, &iMb, &iNb, &innzb, &irow_block_dim, &icol_block_dim, &this->m_ptr_type, &this->m_ind_type, &this->m_val_type, &ibase); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); ROCSPARSE_CHECK_ROCSPARSEIO(rocsparseio2rocsparse_convert(ibase, *base)); ROCSPARSE_CHECK_ROCSPARSEIO(rocsparseio2rocsparse_convert(idir, *dir)); ROCSPARSE_CHECK_ROCSPARSEIO(rocsparseio2rocsparse_convert(idirb, *dirb)); rocsparse_status status; status = rocsparse_type_conversion(iMb, mb[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(iNb, nb[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(innzb, nnzb[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(irow_block_dim, block_dim_row[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(icol_block_dim, block_dim_column[0]); if(status != rocsparse_status_success) return status; this->m_mb = iMb; this->m_nnzb = innzb; this->m_row_block_dim = irow_block_dim; this->m_col_block_dim = icol_block_dim; return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_gebsx(I* ptr, J* ind, T* val) { #ifdef ROCSPARSEIO rocsparseio_status istatus; const rocsparseio_type csr_ptr_type = type_tconvert(), csr_ind_type = type_tconvert(), csr_val_type = type_tconvert(); const size_t MB = this->m_mb; const size_t NNZB = this->m_nnzb; const size_t row_block_dim = this->m_row_block_dim; const size_t col_block_dim = this->m_col_block_dim; const bool same_ptr_type = (this->m_ptr_type == csr_ptr_type); const bool same_ind_type = (this->m_ind_type == csr_ind_type); const bool same_val_type = (this->m_val_type == csr_val_type); const bool is_consistent = same_ptr_type && same_ind_type && same_val_type; if(is_consistent) { // // Import data. // istatus = rocsparseiox_read_sparse_gebsx(this->m_handle, ptr, ind, val); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); } else { void *tmp_ptr = (void*)ptr, *tmp_ind = (void*)ind, *tmp_val = (void*)val; host_dense_vector tmp_ptrv, tmp_indv, tmp_valv; size_t sizeof_ptr_type, sizeof_ind_type, sizeof_val_type; istatus = rocsparseio_type_get_size(this->m_ptr_type, &sizeof_ptr_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); istatus = rocsparseio_type_get_size(this->m_ind_type, &sizeof_ind_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); istatus = rocsparseio_type_get_size(this->m_val_type, &sizeof_val_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); if(!same_ptr_type) { tmp_ptrv.resize((MB + 1) * sizeof_ptr_type); tmp_ptr = tmp_ptrv; } if(!same_ind_type) { tmp_indv.resize(NNZB * sizeof_ind_type); tmp_ind = tmp_indv; } if(!same_val_type) { tmp_valv.resize(NNZB * row_block_dim * col_block_dim * sizeof_val_type); tmp_val = tmp_valv; } istatus = rocsparseiox_read_sparse_csx(this->m_handle, tmp_ptr, tmp_ind, tmp_val); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); if(!same_ptr_type) { switch(this->m_ptr_type) { case rocsparseio_type_int32: { rocsparse_importer_copy_mixed_arrays(MB + 1, ptr, (const int32_t*)tmp_ptr); break; } case rocsparseio_type_int64: { rocsparse_importer_copy_mixed_arrays(MB + 1, ptr, (const int64_t*)tmp_ptr); break; } case rocsparseio_type_float32: case rocsparseio_type_float64: case rocsparseio_type_complex32: case rocsparseio_type_complex64: { break; } } } if(!same_ind_type) { switch(this->m_ind_type) { case rocsparseio_type_int32: { rocsparse_importer_copy_mixed_arrays(NNZB, ind, (const int32_t*)tmp_ind); break; } case rocsparseio_type_int64: { rocsparse_importer_copy_mixed_arrays(NNZB, ind, (const int64_t*)tmp_ind); break; } case rocsparseio_type_float32: case rocsparseio_type_float64: case rocsparseio_type_complex32: case rocsparseio_type_complex64: { break; } } } if(!same_val_type) { switch(this->m_val_type) { case rocsparseio_type_int32: case rocsparseio_type_int64: { break; } case rocsparseio_type_float32: { rocsparse_importer_copy_mixed_arrays( NNZB * row_block_dim * col_block_dim, val, (const float*)tmp_val); break; } case rocsparseio_type_float64: { rocsparse_importer_copy_mixed_arrays( NNZB * row_block_dim * col_block_dim, val, (const double*)tmp_val); break; } case rocsparseio_type_complex32: { rocsparse_importer_copy_mixed_arrays(NNZB * row_block_dim * col_block_dim, val, (const rocsparse_float_complex*)tmp_val); } case rocsparseio_type_complex64: { rocsparse_importer_copy_mixed_arrays(NNZB * row_block_dim * col_block_dim, val, (const rocsparse_double_complex*)tmp_val); break; } } } } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_csx( rocsparse_direction* dir, J* m, J* n, I* nnz, rocsparse_index_base* base) { #ifdef ROCSPARSEIO rocsparseio_status istatus; rocsparseio_direction io_dir; rocsparseio_index_base ibase; istatus = rocsparseio_open(&this->m_handle, rocsparseio_rwmode_read, this->m_filename.c_str()); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); size_t iM; size_t iN; size_t innz; istatus = rocsparseiox_read_metadata_sparse_csx(this->m_handle, &io_dir, &iM, &iN, &innz, &this->m_ptr_type, &this->m_ind_type, &this->m_val_type, &ibase); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); ROCSPARSE_CHECK_ROCSPARSEIO(rocsparseio2rocsparse_convert(ibase, *base)); ROCSPARSE_CHECK_ROCSPARSEIO(rocsparseio2rocsparse_convert(io_dir, *dir)); rocsparse_status status; status = rocsparse_type_conversion(iM, m[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(iN, n[0]); if(status != rocsparse_status_success) return status; status = rocsparse_type_conversion(innz, nnz[0]); if(status != rocsparse_status_success) return status; this->m_m = iM; this->m_nnz = innz; return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_csx(I* ptr, J* ind, T* val) { #ifdef ROCSPARSEIO rocsparseio_status istatus; const rocsparseio_type csr_ptr_type = type_tconvert(), csr_ind_type = type_tconvert(), csr_val_type = type_tconvert(); const size_t M = this->m_m; const size_t NNZ = this->m_nnz; const bool same_ptr_type = (this->m_ptr_type == csr_ptr_type); const bool same_ind_type = (this->m_ind_type == csr_ind_type); const bool same_val_type = (this->m_val_type == csr_val_type); const bool is_consistent = same_ptr_type && same_ind_type && same_val_type; if(is_consistent) { // // Import data. // istatus = rocsparseiox_read_sparse_csx(this->m_handle, ptr, ind, val); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); } else { void *tmp_ptr = (void*)ptr, *tmp_ind = (void*)ind, *tmp_val = (void*)val; host_dense_vector tmp_ptrv, tmp_indv, tmp_valv; size_t sizeof_ptr_type, sizeof_ind_type, sizeof_val_type; istatus = rocsparseio_type_get_size(this->m_ptr_type, &sizeof_ptr_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); istatus = rocsparseio_type_get_size(this->m_ind_type, &sizeof_ind_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); istatus = rocsparseio_type_get_size(this->m_val_type, &sizeof_val_type); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); if(!same_ptr_type) { tmp_ptrv.resize((M + 1) * sizeof_ptr_type); tmp_ptr = tmp_ptrv; } if(!same_ind_type) { tmp_indv.resize(NNZ * sizeof_ind_type); tmp_ind = tmp_indv; } if(!same_val_type) { tmp_valv.resize(NNZ * sizeof_val_type); tmp_val = tmp_valv; } istatus = rocsparseiox_read_sparse_csx(this->m_handle, tmp_ptr, tmp_ind, tmp_val); ROCSPARSE_CHECK_ROCSPARSEIO(istatus); if(!same_ptr_type) { switch(this->m_ptr_type) { case rocsparseio_type_int32: { rocsparse_importer_copy_mixed_arrays(M + 1, ptr, (const int32_t*)tmp_ptr); break; } case rocsparseio_type_int64: { rocsparse_importer_copy_mixed_arrays(M + 1, ptr, (const int64_t*)tmp_ptr); break; } case rocsparseio_type_float32: case rocsparseio_type_float64: case rocsparseio_type_complex32: case rocsparseio_type_complex64: { break; } } } if(!same_ind_type) { switch(this->m_ind_type) { case rocsparseio_type_int32: { rocsparse_importer_copy_mixed_arrays(NNZ, ind, (const int32_t*)tmp_ind); break; } case rocsparseio_type_int64: { rocsparse_importer_copy_mixed_arrays(NNZ, ind, (const int64_t*)tmp_ind); break; } case rocsparseio_type_float32: case rocsparseio_type_float64: case rocsparseio_type_complex32: case rocsparseio_type_complex64: { break; } } } if(!same_val_type) { switch(this->m_val_type) { case rocsparseio_type_int32: case rocsparseio_type_int64: { break; } case rocsparseio_type_float32: { rocsparse_importer_copy_mixed_arrays(NNZ, val, (const float*)tmp_val); break; } case rocsparseio_type_float64: { rocsparse_importer_copy_mixed_arrays(NNZ, val, (const double*)tmp_val); break; } case rocsparseio_type_complex32: { rocsparse_importer_copy_mixed_arrays( NNZ, val, (const rocsparse_float_complex*)tmp_val); } case rocsparseio_type_complex64: { rocsparse_importer_copy_mixed_arrays( NNZ, val, (const rocsparse_double_complex*)tmp_val); break; } } } } return rocsparse_status_success; #else return rocsparse_status_not_implemented; #endif } #define INSTANTIATE_TIJ(T, I, J) \ template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_csx(I*, J*, T*); \ template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_gebsx(I*, J*, T*) #define INSTANTIATE_TI(T, I) \ template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_coo( \ I* row_ind, I* col_ind, T* val) #define INSTANTIATE_I(I) \ template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_coo( \ I* m, I* n, int64_t* nnz, rocsparse_index_base* base) #define INSTANTIATE_IJ(I, J) \ template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_csx( \ rocsparse_direction*, J*, J*, I*, rocsparse_index_base*); \ template rocsparse_status rocsparse_importer_rocsparseio::import_sparse_gebsx( \ rocsparse_direction*, rocsparse_direction*, J*, J*, I*, J*, J*, rocsparse_index_base*) INSTANTIATE_I(int32_t); INSTANTIATE_I(int64_t); INSTANTIATE_IJ(int32_t, int32_t); INSTANTIATE_IJ(int64_t, int32_t); INSTANTIATE_IJ(int64_t, int64_t); INSTANTIATE_TIJ(int8_t, int32_t, int32_t); INSTANTIATE_TIJ(int8_t, int64_t, int32_t); INSTANTIATE_TIJ(int8_t, int64_t, int64_t); INSTANTIATE_TIJ(float, int32_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int32_t); INSTANTIATE_TIJ(float, int64_t, int64_t); INSTANTIATE_TIJ(double, int32_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int32_t); INSTANTIATE_TIJ(double, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_float_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_float_complex, int64_t, int64_t); INSTANTIATE_TIJ(rocsparse_double_complex, int32_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int32_t); INSTANTIATE_TIJ(rocsparse_double_complex, int64_t, int64_t); INSTANTIATE_TI(int8_t, int32_t); INSTANTIATE_TI(int8_t, int64_t); INSTANTIATE_TI(float, int32_t); INSTANTIATE_TI(float, int64_t); INSTANTIATE_TI(double, int32_t); INSTANTIATE_TI(double, int64_t); INSTANTIATE_TI(rocsparse_float_complex, int32_t); INSTANTIATE_TI(rocsparse_float_complex, int64_t); INSTANTIATE_TI(rocsparse_double_complex, int32_t); INSTANTIATE_TI(rocsparse_double_complex, int64_t); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_importer_rocsparseio.hpp000066400000000000000000000072621447342677400262110ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef ROCSPARSE_IMPORTER_ROCSPARSEIO_HPP #define ROCSPARSE_IMPORTER_ROCSPARSEIO_HPP #include "rocsparse_importer.hpp" #ifdef ROCSPARSEIO #include "rocsparseio.h" #endif class rocsparse_importer_rocsparseio : public rocsparse_importer { protected: std::string m_filename{}; #ifdef ROCSPARSEIO rocsparseio_type m_ptr_type{}; rocsparseio_type m_ind_type{}; rocsparseio_type m_val_type{}; rocsparseio_handle m_handle{}; #endif public: ~rocsparse_importer_rocsparseio(); using IMPL = rocsparse_importer_rocsparseio; rocsparse_importer_rocsparseio(const std::string& filename_); public: private: #ifdef ROCSPARSEIO rocsparseio_type m_row_ind_type; rocsparseio_type m_col_ind_type; #endif public: template rocsparse_status import_sparse_coo(I* m, I* n, int64_t* nnz, rocsparse_index_base* base); private: #ifdef ROCSPARSEIO size_t m_m; size_t m_nnz; #endif public: template rocsparse_status import_sparse_coo(I* row_ind, I* col_ind, T* val); public: template rocsparse_status import_sparse_gebsx(rocsparse_direction* dir, rocsparse_direction* dirb, J* mb, J* nb, I* nnzb, J* block_dim_row, J* block_dim_column, rocsparse_index_base* base); private: #ifdef ROCSPARSEIO size_t m_mb{}; size_t m_nnzb{}; size_t m_row_block_dim{}; size_t m_col_block_dim{}; #endif public: template rocsparse_status import_sparse_gebsx(I* ptr, J* ind, T* val); public: template rocsparse_status import_sparse_csx(rocsparse_direction* dir, J* m, J* n, I* nnz, rocsparse_index_base* base); template rocsparse_status import_sparse_csx(I* ptr, J* ind, T* val); }; #endif // HEADER rocSPARSE-rocm-5.7.1/clients/common/rocsparse_init.cpp000066400000000000000000002613441447342677400227000ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_init.hpp" #include "rocsparse_import.hpp" #include "rocsparse_importer_impls.hpp" #include "rocsparse_matrix.hpp" template void host_coo_to_csr( J M, I nnz, const J* coo_row_ind, std::vector& csr_row_ptr, rocsparse_index_base base) { // Resize and initialize csr_row_ptr with zeros csr_row_ptr.resize(M + 1, 0); for(size_t i = 0; i < nnz; ++i) { ++csr_row_ptr[coo_row_ind[i] + 1 - base]; } csr_row_ptr[0] = base; for(J i = 0; i < M; ++i) { csr_row_ptr[i + 1] += csr_row_ptr[i]; } } template void host_csr_to_coo(J M, I nnz, const std::vector& csr_row_ptr, std::vector& coo_row_ind, rocsparse_index_base base) { // Resize coo_row_ind coo_row_ind.resize(nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; for(I j = row_begin; j < row_end; ++j) { coo_row_ind[j] = i + base; } } } template void host_csr_to_coo_aos(J M, I nnz, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, std::vector& coo_ind, rocsparse_index_base base) { // Resize coo_ind coo_ind.resize(2 * nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(I i = 0; i < M; ++i) { I row_begin = csr_row_ptr[i] - base; I row_end = csr_row_ptr[i + 1] - base; for(I j = row_begin; j < row_end; ++j) { coo_ind[2 * j] = i + base; coo_ind[2 * j + 1] = static_cast(csr_col_ind[j]); } } } template void host_csr_to_ell(J M, const std::vector& csr_row_ptr, const std::vector& csr_col_ind, const std::vector& csr_val, std::vector& ell_col_ind, std::vector& ell_val, J& ell_width, rocsparse_index_base csr_base, rocsparse_index_base ell_base) { // Determine ELL width ell_width = 0; for(J i = 0; i < M; ++i) { J row_nnz = csr_row_ptr[i + 1] - csr_row_ptr[i]; ell_width = std::max(row_nnz, ell_width); } // Compute ELL non-zeros int64_t ell_nnz = (int64_t)ell_width * M; size_t required_memory = sizeof(J) * ell_nnz + sizeof(T) * ell_nnz; size_t available_memory = 0; hipDeviceGetLimit(&available_memory, hipLimit_t::hipLimitMallocHeapSize); if(required_memory > available_memory) { std::cerr << "Error: Insufficient memory available for conversion from CSR to ELL format. " "Required: " << required_memory << " available: " << available_memory << ". (File: " << __FILE__ << " Line: " << __LINE__ << ")" << std::endl; exit(1); } ell_col_ind.resize(ell_nnz); ell_val.resize(ell_nnz); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(J i = 0; i < M; ++i) { J p = 0; I row_begin = csr_row_ptr[i] - csr_base; I row_end = csr_row_ptr[i + 1] - csr_base; J row_nnz = row_end - row_begin; // Fill ELL matrix with data for(I j = row_begin; j < row_end; ++j) { int64_t idx = (int64_t)p * M + i; ell_col_ind[idx] = csr_col_ind[j] - csr_base + ell_base; ell_val[idx] = csr_val[j]; ++p; } // Add padding to ELL structures for(J j = row_nnz; j < ell_width; ++j) { int64_t idx = (int64_t)p * M + i; ell_col_ind[idx] = -1; ell_val[idx] = static_cast(0); ++p; } } } /* ==================================================================================== */ /*! \brief matrix/vector initialization: */ // for vector x (M=1, N=lengthX, lda=incx); // for complex number, the real/imag part would be initialized with the same value // Initialize vector with random values template void rocsparse_init_exact( T* A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count, int a, int b) { constexpr size_t RANDOM_CACHE_SIZE = 1024; std::vector random(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random[i] = random_generator_exact(a, b); } for(size_t i_batch = 0; i_batch < batch_count; i_batch++) { for(size_t j = 0; j < N; ++j) { for(size_t i = 0; i < M; ++i) { A[i + j * lda + i_batch * stride] = random[(i * 17 + j * lda * 59 + i_batch * stride * 83) % RANDOM_CACHE_SIZE]; } } } } template void rocsparse_init( T* A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count, T a, T b) { constexpr size_t RANDOM_CACHE_SIZE = 1024; std::vector random(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random[i] = random_generator(a, b); } for(size_t i_batch = 0; i_batch < batch_count; i_batch++) { for(size_t j = 0; j < N; ++j) { for(size_t i = 0; i < M; ++i) { A[i + j * lda + i_batch * stride] = random[(i * 17 + j * lda * 59 + i_batch * stride * 83) % RANDOM_CACHE_SIZE]; } } } } template void rocsparse_init_exact(std::vector& A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count, int a, int b) { rocsparse_init_exact(A.data(), M, N, lda, stride, batch_count, a, b); } template void rocsparse_init( std::vector& A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count, T a, T b) { rocsparse_init(A.data(), M, N, lda, stride, batch_count, a, b); } // Initializes sparse index vector with nnz entries ranging from start to end template void rocsparse_init_index(std::vector& x, size_t nnz, size_t start, size_t end) { std::vector check(end - start, false); I num = 0; while(num < nnz) { I val = random_generator(start, end - 1); if(!check[val - start]) { x[num++] = val; check[val - start] = true; } } std::sort(x.begin(), x.end()); } // Initialize matrix so adjacent entries have alternating sign. // In gemm if either A or B are initialized with alernating // sign the reduction sum will be summing positive // and negative numbers, so it should not get too large. // This helps reduce floating point inaccuracies for 16bit // arithmetic where the exponent has only 5 bits, and the // mantissa 10 bits. template void rocsparse_init_alternating_sign( std::vector& A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) { auto value = random_generator_exact(); A[i + j * lda + i_batch * stride] = (i ^ j) & 1 ? value : -value; } } /* ==================================================================================== */ /*! \brief Initialize an array with random data, with NaN where appropriate */ template void rocsparse_init_nan(T* A, size_t N) { for(size_t i = 0; i < N; ++i) A[i] = T(rocsparse_nan_rng()); } template void rocsparse_init_nan( std::vector& A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = T(rocsparse_nan_rng()); } /* ==================================================================================== */ /*! \brief Generate a random sparse matrix in COO format */ template void rocsparse_init_coo_matrix(std::vector& row_ind, std::vector& col_ind, std::vector& val, I M, I N, int64_t nnz, rocsparse_index_base base, bool full_rank, bool to_int) { if(nnz == 0) { row_ind.resize(nnz); col_ind.resize(nnz); val.resize(nnz); return; } // If M > N, full rank is not possible if(full_rank && M > N) { std::cerr << "ERROR: M > N, cannot generate matrix with full rank" << std::endl; full_rank = false; } // If nnz < M, full rank is not possible if(full_rank && nnz < M) { std::cerr << "ERROR: nnz < M, cannot generate matrix with full rank" << std::endl; full_rank = false; } if(row_ind.size() != nnz) { row_ind.resize(nnz); } if(col_ind.size() != nnz) { col_ind.resize(nnz); } if(val.size() != nnz) { val.resize(nnz); } // Generate histogram of non-zero counts per row based on average non-zeros per row std::vector count(M, 0); I start = full_rank ? (I)std::min((int64_t)M, nnz) : 0; if(full_rank) { for(I k = 0; k < start; ++k) { count[k] = 1; } } int64_t remaining_nnz = nnz - start; I avg_nnz_per_row = remaining_nnz / M; for(I k = 0; k < M; k++) { I nnz_in_row = std::min(random_generator(0, 2 * avg_nnz_per_row), N); nnz_in_row = (I)std::min(remaining_nnz, (int64_t)nnz_in_row); count[k] += nnz_in_row; remaining_nnz -= nnz_in_row; } // Sprinkle any remaining non-zeros amoung the rows for(int64_t k = 0; k < remaining_nnz; ++k) { I i = random_generator(0, M - 1); int maxiter = 0; while(count[i] >= N && maxiter++ < 10) { i = random_generator(0, M - 1); } if(maxiter >= 10) { for(i = 0; i < M; ++i) { if(count[i] < N) { break; } } if(i == M) { std::cerr << "rocsparse_init_coo_matrix error" << std::endl; exit(1); } } count[i] += 1; } // Compute row index array from non-zeros per row count histogram int64_t offset = 0; I max_nnz_per_row = count[0]; for(I k = 0; k < M; k++) { I nnz_in_row = count[k]; if(max_nnz_per_row < nnz_in_row) max_nnz_per_row = nnz_in_row; for(I i = 0; i < nnz_in_row; i++) { row_ind[offset + i] = k; } offset += nnz_in_row; } // Generate column index array with values clustered around the diagonal I sec = std::min(2 * max_nnz_per_row, N); std::vector random(2 * sec + 1); int64_t at = 0; for(I i = 0; i < M; ++i) { int64_t begin = at; I nnz_in_row = count[i]; I bmax = std::min(i + sec, N - 1); I bmin = std::max(bmax - 2 * sec, ((I)0)); // Initial permutation of column indices for(I k = 0; k <= (bmax - bmin); ++k) { random[k] = k; } // shuffle permutation for(I k = 0; k < nnz_in_row; ++k) { std::swap(random[k], random[random_generator(0, bmax - bmin)]); } if(full_rank) { col_ind[at++] = i; for(I k = 1; k < nnz_in_row; ++k) { if(bmin + random[k] == i) { col_ind[at++] = bmin + random[bmax - bmin]; } else { col_ind[at++] = bmin + random[k]; } } } else { for(I k = 0; k < nnz_in_row; ++k) { col_ind[at++] = bmin + random[k]; } } if(nnz_in_row > 0) { std::sort(col_ind.data() + begin, col_ind.data() + begin + nnz_in_row); } } // Correct index base accordingly if(base == rocsparse_index_base_one) { for(int64_t i = 0; i < nnz; ++i) { ++row_ind[i]; ++col_ind[i]; } } constexpr size_t RANDOM_CACHE_SIZE = 1024; if(to_int) { std::vector random(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random[i] = random_generator_exact(); } // Sample random values for(int64_t i = 0; i < nnz; ++i) { val[i] = random[i % RANDOM_CACHE_SIZE]; } } else { if(full_rank) { std::vector random_off_diag(RANDOM_CACHE_SIZE); std::vector random_diag1(RANDOM_CACHE_SIZE); std::vector random_diag2(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random_off_diag[i] = random_generator(static_cast(-0.5), static_cast(0.5)); random_diag1[i] = random_generator(static_cast(4.0), static_cast(8.0)); random_diag2[i] = random_generator(static_cast(-1.0e-2), static_cast(1.0e-2)); } // Sample random off-diagonal values for(int64_t i = 0; i < nnz; ++i) { if(row_ind[i] == col_ind[i]) { // Sample diagonal values val[i] = random_diag1[i % RANDOM_CACHE_SIZE]; val[i] += val[i] * random_diag2[i % RANDOM_CACHE_SIZE]; } else { // Samples off-diagonal values val[i] = random_off_diag[i % RANDOM_CACHE_SIZE]; } } } else { std::vector random(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random[i] = random_generator(static_cast(-1.0), static_cast(1.0)); } // Sample random values for(int64_t i = 0; i < nnz; ++i) { val[i] = random[i % RANDOM_CACHE_SIZE]; } } } } /* ==================================================================================== */ /*! \brief Generate 2D 9pt laplacian on unit square in CSR format */ template void rocsparse_init_csr_laplace2d(std::vector& row_ptr, std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, J& M, J& N, I& nnz, rocsparse_index_base base) { // Do nothing if(dim_x == 0 || dim_y == 0) { return; } M = dim_x * dim_y; N = dim_x * dim_y; // Approximate 9pt stencil I nnz_mat = 9 * M; row_ptr.resize(M + 1); col_ind.resize(nnz_mat); val.resize(nnz_mat); nnz = base; row_ptr[0] = base; // Fill local arrays #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(int32_t iy = 0; iy < dim_y; ++iy) { for(int32_t ix = 0; ix < dim_x; ++ix) { J row = iy * dim_x + ix; for(int32_t sy = -1; sy <= 1; ++sy) { if(iy + sy > -1 && iy + sy < dim_y) { for(int32_t sx = -1; sx <= 1; ++sx) { if(ix + sx > -1 && ix + sx < dim_x) { J col = row + sy * dim_x + sx; col_ind[nnz - base] = col + base; val[nnz - base] = (col == row) ? 8.0 : -1.0; ++nnz; } } } } row_ptr[row + 1] = nnz; } } // Adjust nnz by index base nnz -= base; // compress to actual nnz col_ind.resize(nnz); val.resize(nnz); } /* ==================================================================================== */ /*! \brief Generate 2D 9pt laplacian on unit square in COO format */ template void rocsparse_init_coo_laplace2d(std::vector& row_ind, std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, I& M, I& N, int64_t& nnz, rocsparse_index_base base) { // Always load using int64 as we dont know ahead of time how many nnz exist in matrix std::vector row_ptr; // Sample CSR matrix rocsparse_init_csr_laplace2d(row_ptr, col_ind, val, dim_x, dim_y, M, N, nnz, base); // Convert to COO host_csr_to_coo(M, nnz, row_ptr, row_ind, base); } /* ==================================================================================== */ /*! \brief Generate 2D 9pt laplacian on unit square in GEBSR format */ template void rocsparse_init_gebsr_laplace2d(std::vector& row_ptr, std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, J& Mb, J& Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base) { rocsparse_init_csr_laplace2d(row_ptr, col_ind, val, dim_x, dim_y, Mb, Nb, nnzb, base); const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; val.resize(nvalues); for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator(); } } /* ==================================================================================== */ /*! \brief Generate 2D 9pt laplacian on unit square in ELL format */ template void rocsparse_init_ell_laplace2d(std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, I& M, I& N, I& width, rocsparse_index_base base) { I csr_nnz; std::vector csr_row_ptr; std::vector csr_col_ind; std::vector csr_val; // Sample CSR matrix rocsparse_init_csr_laplace2d( csr_row_ptr, csr_col_ind, csr_val, dim_x, dim_y, M, N, csr_nnz, base); // Convert to ELL host_csr_to_ell(M, csr_row_ptr, csr_col_ind, csr_val, col_ind, val, width, base, base); } /* ==================================================================================== */ /*! \brief Generate 3D 27pt laplacian on unit square in CSR format */ template void rocsparse_init_csr_laplace3d(std::vector& row_ptr, std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, int32_t dim_z, J& M, J& N, I& nnz, rocsparse_index_base base) { // Do nothing if(dim_x == 0 || dim_y == 0 || dim_z == 0) { return; } M = dim_x * dim_y * dim_z; N = dim_x * dim_y * dim_z; // Approximate 27pt stencil I nnz_mat = 27 * M; row_ptr.resize(M + 1); col_ind.resize(nnz_mat); val.resize(nnz_mat); nnz = base; row_ptr[0] = base; // Fill local arrays #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(int32_t iz = 0; iz < dim_z; ++iz) { for(int32_t iy = 0; iy < dim_y; ++iy) { for(int32_t ix = 0; ix < dim_x; ++ix) { J row = iz * dim_x * dim_y + iy * dim_x + ix; for(int32_t sz = -1; sz <= 1; ++sz) { if(iz + sz > -1 && iz + sz < dim_z) { for(int32_t sy = -1; sy <= 1; ++sy) { if(iy + sy > -1 && iy + sy < dim_y) { for(int32_t sx = -1; sx <= 1; ++sx) { if(ix + sx > -1 && ix + sx < dim_x) { J col = row + sz * dim_x * dim_y + sy * dim_x + sx; col_ind[nnz - base] = col + base; val[nnz - base] = (col == row) ? 26.0 : -1.0; ++nnz; } } } } } } row_ptr[row + 1] = nnz; } } } // Adjust nnz by index base nnz -= base; // compress to actual nnz col_ind.resize(nnz); val.resize(nnz); } /* ==================================================================================== */ /*! \brief Generate 3D 27pt laplacian on unit square in COO format */ template void rocsparse_init_coo_laplace3d(std::vector& row_ind, std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, int32_t dim_z, I& M, I& N, int64_t& nnz, rocsparse_index_base base) { // Always load using int64 as we dont know ahead of time how many nnz exist in matrix std::vector row_ptr; // Sample CSR matrix rocsparse_init_csr_laplace3d(row_ptr, col_ind, val, dim_x, dim_y, dim_z, M, N, nnz, base); // Convert to COO host_csr_to_coo(M, nnz, row_ptr, row_ind, base); } /* ==================================================================================== */ /*! \brief Generate 3D 27pt laplacian on unit square in GEBSR format */ template void rocsparse_init_gebsr_laplace3d(std::vector& row_ptr, std::vector& col_ind, std::vector& val, int32_t dim_x, int32_t dim_y, int32_t dim_z, J& Mb, J& Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base) { rocsparse_init_csr_laplace3d(row_ptr, col_ind, val, dim_x, dim_y, dim_z, Mb, Nb, nnzb, base); const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; val.resize(nvalues); for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator(); } } /* ==================================================================================== */ /*! \brief Read matrix from mtx file in CSR format */ template void rocsparse_init_csr_mtx(const char* filename, std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, J& M, J& N, I& nnz, rocsparse_index_base base) { I coo_M, coo_N; int64_t coo_nnz; std::vector coo_row_ind; std::vector coo_col_ind; // Read COO matrix rocsparse_init_coo_mtx( filename, coo_row_ind, coo_col_ind, csr_val, coo_M, coo_N, coo_nnz, base); // Convert to CSR M = (J)coo_M; N = (J)coo_N; nnz = (I)coo_nnz; csr_row_ptr.resize(M + 1); csr_col_ind.resize(nnz); host_coo_to_csr(coo_M, nnz, coo_row_ind.data(), csr_row_ptr, base); for(I i = 0; i < nnz; ++i) { csr_col_ind[i] = (J)coo_col_ind[i]; } } /* ============================================================================================ */ /*! \brief Read matrix from mtx file in COO format */ template void rocsparse_init_coo_mtx(const char* filename, std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, I& M, I& N, int64_t& nnz, rocsparse_index_base base) { rocsparse_importer_matrixmarket importer(filename); rocsparse_status status = rocsparse_import_sparse_coo(importer, coo_row_ind, coo_col_ind, coo_val, M, N, nnz, base); CHECK_ROCSPARSE_THROW_ERROR(status); } /* ============================================================================================ */ /*! \brief Read matrix from mtx file in GEBSR format */ template void rocsparse_init_gebsr_mtx(const char* filename, std::vector& bsr_row_ptr, std::vector& bsr_col_ind, std::vector& bsr_val, J& Mb, J& Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base) { //this->init_csr(bsr_row_ptr, bsr_col_ind, bsr_val, Mb, Nb, nnzb, base); rocsparse_init_csr_mtx(filename, bsr_row_ptr, bsr_col_ind, bsr_val, Mb, Nb, nnzb, base); const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; bsr_val.resize(nvalues); for(size_t i = 0; i < nvalues; ++i) { bsr_val[i] = random_generator(); } } template void rocsparse_init_csr_rocalution(const char* filename, std::vector& row_ptr, std::vector& col_ind, std::vector& val, J& M, J& N, I& nnz, rocsparse_index_base base) { rocsparse_importer_rocalution importer(filename); rocsparse_status status = rocsparse_import_sparse_csr(importer, row_ptr, col_ind, val, M, N, nnz, base); CHECK_ROCSPARSE_THROW_ERROR(status); } /* ==================================================================================== */ /*! \brief Read matrix from binary file in rocALUTION format */ template void rocsparse_init_coo_rocalution(const char* filename, std::vector& row_ind, std::vector& col_ind, std::vector& val, I& M, I& N, int64_t& nnz, rocsparse_index_base base) { I csr_nnz = 0; std::vector row_ptr(M + 1); // Sample CSR matrix rocsparse_init_csr_rocalution(filename, row_ptr, col_ind, val, M, N, csr_nnz, base); host_csr_to_coo(M, csr_nnz, row_ptr, row_ind, base); nnz = csr_nnz; } /* ==================================================================================== */ /*! \brief Read matrix from binary file in rocALUTION format */ template void rocsparse_init_gebsr_rocalution(const char* filename, std::vector& row_ptr, std::vector& col_ind, std::vector& val, J& Mb, J& Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base) { // Temporarily the file contains a CSR matrix. rocsparse_init_csr_rocalution(filename, row_ptr, col_ind, val, Mb, Nb, nnzb, base); // Then temporarily skip the values. const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; val.resize(nvalues); for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator(); } } /* ==================================================================================== */ /*! \brief Read matrix from binary file in rocSPARSEIO format */ template void rocsparse_init_csr_rocsparseio(const char* filename, std::vector& row_ptr, std::vector& col_ind, std::vector& val, J& M, J& N, I& nnz, rocsparse_index_base base) { rocsparse_importer_rocsparseio importer(filename); rocsparse_status status = rocsparse_import_sparse_csr(importer, row_ptr, col_ind, val, M, N, nnz, base); CHECK_ROCSPARSE_THROW_ERROR(status); } /* ==================================================================================== */ /*! \brief Read matrix from binary file in rocSPARSEIO format */ template void rocsparse_init_coo_rocsparseio(const char* filename, std::vector& row_ind, std::vector& col_ind, std::vector& val, I& M, I& N, int64_t& nnz, rocsparse_index_base base) { rocsparse_importer_rocsparseio importer(filename); rocsparse_status status = rocsparse_import_sparse_coo(importer, row_ind, col_ind, val, M, N, nnz, base); CHECK_ROCSPARSE_THROW_ERROR(status); } /* ==================================================================================== */ /*! \brief Read matrix from binary file in rocSPARSEIO format */ template void rocsparse_init_gebsr_rocsparseio(const char* filename, std::vector& row_ptr, std::vector& col_ind, std::vector& val, rocsparse_direction dir, J& Mb, J& Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base) { rocsparse_direction import_dir = {}; rocsparse_importer_rocsparseio importer(filename); rocsparse_status status = rocsparse_import_sparse_gebsr(importer, row_ptr, col_ind, val, import_dir, Mb, Nb, nnzb, row_block_dim, col_block_dim, base); CHECK_ROCSPARSE_THROW_ERROR(status); if(import_dir != dir) { std::cerr << "TODO, reorder ?" << std::endl; exit(1); } } /* ==================================================================================== */ /*! \brief Generate a random sparse matrix in CSR format */ template void rocsparse_init_csr_random(std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, J M, J N, I& nnz, rocsparse_index_base base, rocsparse_matrix_init_kind init_kind, bool full_rank, bool to_int) { switch(init_kind) { case rocsparse_matrix_init_kind_tunedavg: { rocsparse_int alpha = static_cast(0); if(N >= 16384) { alpha = static_cast(8); } else if(N >= 8192) { alpha = static_cast(8); } else if(N >= 4096) { alpha = static_cast(16); } else if(N >= 1024) { alpha = static_cast(32); } else { alpha = static_cast(64); } nnz = static_cast(M) * alpha; nnz = std::min(nnz, static_cast(M) * static_cast(N)); // Sample random matrix std::vector row_ind(nnz); // Sample COO matrix rocsparse_init_coo_matrix( row_ind, csr_col_ind, csr_val, M, N, nnz, base, full_rank, to_int); // Convert to CSR host_coo_to_csr(M, nnz, row_ind.data(), csr_row_ptr, base); break; } case rocsparse_matrix_init_kind_default: { if(M < 32 && N < 32) { nnz = (static_cast(M) * static_cast(N)) / 4; if(full_rank) { nnz = std::max(nnz, static_cast(M)); } nnz = std::max(nnz, static_cast(M)); nnz = std::min(nnz, static_cast(M) * static_cast(N)); } else { nnz = static_cast(M) * ((M > 1000 || N > 1000) ? 2.0 / std::max(M, N) : 0.02) * static_cast(N); } // Sample random matrix std::vector row_ind(nnz); // Sample COO matrix rocsparse_init_coo_matrix( row_ind, csr_col_ind, csr_val, M, N, nnz, base, full_rank, to_int); // Convert to CSR host_coo_to_csr(M, nnz, row_ind.data(), csr_row_ptr, base); break; } } } /* ==================================================================================== */ /*! \brief Generate a random sparse matrix in COO format */ template void rocsparse_init_coo_random(std::vector& row_ind, std::vector& col_ind, std::vector& val, I M, I N, int64_t& nnz, rocsparse_index_base base, rocsparse_matrix_init_kind init_kind, bool full_rank, bool to_int) { switch(init_kind) { case rocsparse_matrix_init_kind_tunedavg: { rocsparse_int alpha = static_cast(0); if(N >= 16384) { alpha = static_cast(8); } else if(N >= 8192) { alpha = static_cast(16); } else if(N >= 4096) { alpha = static_cast(32); } else if(N >= 1024) { alpha = static_cast(64); } else { alpha = static_cast(128); } nnz = static_cast(M) * alpha; nnz = std::min(nnz, static_cast(M) * static_cast(N)); // Sample random matrix rocsparse_init_coo_matrix(row_ind, col_ind, val, M, N, nnz, base, full_rank, to_int); break; } case rocsparse_matrix_init_kind_default: { // Compute non-zero entries of the matrix if(M < 32 && N < 32) { nnz = (static_cast(M) * static_cast(N)) / 4; } else { nnz = static_cast(M) * ((M > 1000 || N > 1000) ? 2.0 / std::max(M, N) : 0.02) * static_cast(N); } // Sample random matrix rocsparse_init_coo_matrix(row_ind, col_ind, val, M, N, nnz, base, full_rank, to_int); break; } } } /* ==================================================================================== */ /*! \brief Generate a random sparse matrix in GEBSR format */ template void rocsparse_init_gebsr_random(std::vector& row_ptr, std::vector& col_ind, std::vector& val, J Mb, J Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base, rocsparse_matrix_init_kind init_kind, bool full_rank, bool to_int) { rocsparse_init_csr_random( row_ptr, col_ind, val, Mb, Nb, nnzb, base, init_kind, full_rank, to_int); const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; val.resize(nvalues); if(to_int) { for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator_exact(); } } else { for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator(); } } } /* ==================================================================================== */ /*! \brief Generate a tridiagonal sparse matrix in COO format */ template void rocsparse_init_coo_tridiagonal(std::vector& row_ind, std::vector& col_ind, std::vector& val, I M, I N, int64_t& nnz, rocsparse_index_base base, I l, I u) { if(l >= 0 || -l >= M) { std::cerr << "ERROR: l >= 0 || -l >= M" << std::endl; return; } if(u <= 0 || u >= N) { std::cerr << "ERROR: u <= 0 || u >= N" << std::endl; return; } constexpr size_t RANDOM_CACHE_SIZE = 1024; std::vector random(RANDOM_CACHE_SIZE); std::vector random_diag(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random[i] = random_generator(static_cast(-1.0), static_cast(1.0)); random_diag[i] = random_generator(static_cast(2.0), static_cast(4.0)); } int64_t l_length = std::min((M + l), N); int64_t d_length = std::min(M, N); int64_t u_length = std::min((N - u), M); nnz = l_length + d_length + u_length; row_ind.resize(nnz); col_ind.resize(nnz); val.resize(nnz); int64_t index = 0; for(I i = 0; i < M; i++) { I l_col = i + l; I d_col = i; I u_col = i + u; if(l_col >= 0 && l_col < N) { row_ind[index] = i + base; col_ind[index] = l_col + base; val[index] = random[index % RANDOM_CACHE_SIZE]; index++; } if(d_col >= 0 && d_col < N) { row_ind[index] = i + base; col_ind[index] = d_col + base; val[index] = random_diag[index % RANDOM_CACHE_SIZE]; index++; } if(u_col >= 0 && u_col < N) { row_ind[index] = i + base; col_ind[index] = u_col + base; val[index] = random[index % RANDOM_CACHE_SIZE]; index++; } } } /* ==================================================================================== */ /*! \brief Generate a tridiagonal sparse matrix in CSR format */ template void rocsparse_init_csr_tridiagonal(std::vector& row_ptr, std::vector& col_ind, std::vector& val, J M, J N, I& nnz, rocsparse_index_base base, J l, J u) { int64_t coo_nnz; std::vector row_ind; // Sample COO matrix rocsparse_init_coo_tridiagonal(row_ind, col_ind, val, M, N, coo_nnz, base, l, u); if(std::is_same() && coo_nnz > std::numeric_limits::max()) { std::cerr << "Error: Attempting to create CSR tridiagonal matrix with more than " << std::numeric_limits::max() << " non-zeros while using int32_t row indexing." << std::endl; exit(1); } nnz = (I)coo_nnz; // Convert to CSR host_coo_to_csr(M, nnz, row_ind.data(), row_ptr, base); } /* ==================================================================================== */ /*! \brief Generate a tridiagonal sparse matrix in GEBSR format */ template void rocsparse_init_gebsr_tridiagonal(std::vector& row_ptr, std::vector& col_ind, std::vector& val, J Mb, J Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base, J l, J u) { rocsparse_init_csr_tridiagonal(row_ptr, col_ind, val, Mb, Nb, nnzb, base, l, u); const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; val.resize(nvalues); for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator(); } } /* ==================================================================================== */ /*! \brief Generate a pentadiagonal sparse matrix in COO format */ template void rocsparse_init_coo_pentadiagonal(std::vector& row_ind, std::vector& col_ind, std::vector& val, I M, I N, int64_t& nnz, rocsparse_index_base base, I ll, I l, I u, I uu) { if(ll >= 0 || l >= 0 || ll >= l || -l >= M || -ll >= M) { std::cerr << "ERROR: ll >= 0 || l >= 0 || ll >= l || -l >= M || -ll >= M" << std::endl; return; } if(u <= 0 || uu <= 0 || uu <= u || u >= N || uu >= N) { std::cerr << "ERROR: u <= 0 || uu <= 0 || uu <= u || u >= N || uu >= N" << std::endl; return; } constexpr size_t RANDOM_CACHE_SIZE = 1024; std::vector random(RANDOM_CACHE_SIZE); std::vector random_diag(RANDOM_CACHE_SIZE); for(size_t i = 0; i < RANDOM_CACHE_SIZE; i++) { random[i] = random_generator(static_cast(-1.0), static_cast(1.0)); random_diag[i] = random_generator(static_cast(4.0), static_cast(6.0)); } int64_t l_length = std::min((M + l), N); int64_t ll_length = std::min((M + ll), N); int64_t d_length = std::min(M, N); int64_t u_length = std::min((N - u), M); int64_t uu_length = std::min((N - uu), M); nnz = ll_length + l_length + d_length + u_length + uu_length; row_ind.resize(nnz); col_ind.resize(nnz); val.resize(nnz); int64_t index = 0; for(I i = 0; i < M; i++) { I ll_col = i + ll; I l_col = i + l; I d_col = i; I u_col = i + u; I uu_col = i + uu; if(ll_col >= 0 && ll_col < N) { row_ind[index] = i + base; col_ind[index] = ll_col + base; val[index] = random[index % RANDOM_CACHE_SIZE]; index++; } if(l_col >= 0 && l_col < N) { row_ind[index] = i + base; col_ind[index] = l_col + base; val[index] = random[index % RANDOM_CACHE_SIZE]; index++; } if(d_col >= 0 && d_col < N) { row_ind[index] = i + base; col_ind[index] = d_col + base; val[index] = random_diag[index % RANDOM_CACHE_SIZE]; index++; } if(u_col >= 0 && u_col < N) { row_ind[index] = i + base; col_ind[index] = u_col + base; val[index] = random[index % RANDOM_CACHE_SIZE]; index++; } if(uu_col >= 0 && uu_col < N) { row_ind[index] = i + base; col_ind[index] = uu_col + base; val[index] = random[index % RANDOM_CACHE_SIZE]; index++; } } } /* ==================================================================================== */ /*! \brief Generate a pentadiagonal sparse matrix in CSR format */ template void rocsparse_init_csr_pentadiagonal(std::vector& row_ptr, std::vector& col_ind, std::vector& val, J M, J N, I& nnz, rocsparse_index_base base, J ll, J l, J u, J uu) { int64_t coo_nnz; std::vector row_ind; // Sample COO matrix rocsparse_init_coo_pentadiagonal(row_ind, col_ind, val, M, N, coo_nnz, base, ll, l, u, uu); if(std::is_same() && coo_nnz > std::numeric_limits::max()) { std::cerr << "Error: Attempting to create CSR pentadiagonal matrix with more than " << std::numeric_limits::max() << " non-zeros while using int32_t row indexing." << std::endl; exit(1); } nnz = (I)coo_nnz; // Convert to CSR host_coo_to_csr(M, nnz, row_ind.data(), row_ptr, base); } /* ==================================================================================== */ /*! \brief Generate a pentadiagonal sparse matrix in GEBSR format */ template void rocsparse_init_gebsr_pentadiagonal(std::vector& row_ptr, std::vector& col_ind, std::vector& val, J Mb, J Nb, I& nnzb, J row_block_dim, J col_block_dim, rocsparse_index_base base, J ll, J l, J u, J uu) { rocsparse_init_csr_pentadiagonal(row_ptr, col_ind, val, Mb, Nb, nnzb, base, ll, l, u, uu); const size_t nvalues = size_t(nnzb) * row_block_dim * col_block_dim; val.resize(nvalues); for(size_t i = 0; i < nvalues; ++i) { val[i] = random_generator(); } } #define INSTANTIATEI(TYPE) \ template void rocsparse_init_index( \ std::vector & x, size_t nnz, size_t start, size_t end); #define INSTANTIATE(TYPE) \ template void rocsparse_init(TYPE * A, \ size_t M, \ size_t N, \ size_t lda, \ size_t stride, \ size_t batch_count = 1, \ TYPE a = static_cast(0), \ TYPE b = static_cast(1)); \ template void rocsparse_init_exact(TYPE * A, \ size_t M, \ size_t N, \ size_t lda, \ size_t stride, \ size_t batch_count, \ int a = 1, \ int b = 10); \ template void rocsparse_init(std::vector & A, \ size_t M, \ size_t N, \ size_t lda, \ size_t stride, \ size_t batch_count = 1, \ TYPE a = static_cast(0), \ TYPE b = static_cast(1)); \ template void rocsparse_init_exact(std::vector & A, \ size_t M, \ size_t N, \ size_t lda, \ size_t stride, \ size_t batch_count, \ int a = 1, \ int b = 10); \ template void rocsparse_init_alternating_sign( \ std::vector & A, size_t M, size_t N, size_t lda, size_t stride, size_t batch_count); \ template void rocsparse_init_nan(TYPE * A, size_t N); \ template void rocsparse_init_nan(std::vector & A, \ size_t M, \ size_t N, \ size_t lda, \ size_t stride = 0, \ size_t batch_count); #define INSTANTIATE1(ITYPE, JTYPE) \ template void host_csr_to_coo(JTYPE M, \ ITYPE nnz, \ const std::vector& csr_row_ptr, \ std::vector& coo_row_ind, \ rocsparse_index_base base); \ template void host_coo_to_csr(JTYPE M, \ ITYPE NNZ, \ const JTYPE* coo_row_ind, \ std::vector& csr_row_ptr, \ rocsparse_index_base base); \ template void host_csr_to_coo_aos(JTYPE M, \ ITYPE nnz, \ const std::vector& csr_row_ptr, \ const std::vector& csr_col_ind, \ std::vector& coo_ind, \ rocsparse_index_base base); #define INSTANTIATE2(ITYPE, TTYPE) \ template void rocsparse_init_coo_tridiagonal(std::vector & row_ind, \ std::vector & col_ind, \ std::vector & val, \ ITYPE M, \ ITYPE N, \ int64_t & nnz, \ rocsparse_index_base base, \ ITYPE l, \ ITYPE u); \ template void rocsparse_init_coo_pentadiagonal(std::vector & row_ind, \ std::vector & col_ind, \ std::vector & val, \ ITYPE M, \ ITYPE N, \ int64_t & nnz, \ rocsparse_index_base base, \ ITYPE ll, \ ITYPE l, \ ITYPE u, \ ITYPE uu); \ template void rocsparse_init_coo_laplace2d(std::vector & row_ind, \ std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ ITYPE & M, \ ITYPE & N, \ int64_t & nnz, \ rocsparse_index_base base); \ template void rocsparse_init_ell_laplace2d(std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ ITYPE & M, \ ITYPE & N, \ ITYPE & width, \ rocsparse_index_base base); \ template void rocsparse_init_coo_matrix(std::vector & row_ind, \ std::vector & col_ind, \ std::vector & val, \ ITYPE M, \ ITYPE N, \ int64_t nnz, \ rocsparse_index_base base, \ bool full_rank, \ bool to_int); \ template void rocsparse_init_coo_laplace3d(std::vector & row_ind, \ std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ int32_t dim_z, \ ITYPE & M, \ ITYPE & N, \ int64_t & nnz, \ rocsparse_index_base base); \ template void rocsparse_init_coo_mtx(const char* filename, \ std::vector& coo_row_ind, \ std::vector& coo_col_ind, \ std::vector& coo_val, \ ITYPE& M, \ ITYPE& N, \ int64_t& nnz, \ rocsparse_index_base base); \ template void rocsparse_init_coo_rocalution(const char* filename, \ std::vector& row_ind, \ std::vector& col_ind, \ std::vector& val, \ ITYPE& M, \ ITYPE& N, \ int64_t& nnz, \ rocsparse_index_base base); \ template void rocsparse_init_coo_rocsparseio(const char* filename, \ std::vector& row_ind, \ std::vector& col_ind, \ std::vector& val, \ ITYPE& M, \ ITYPE& N, \ int64_t& nnz, \ rocsparse_index_base base); \ template void rocsparse_init_coo_random(std::vector & row_ind, \ std::vector & col_ind, \ std::vector & val, \ ITYPE M, \ ITYPE N, \ int64_t & nnz, \ rocsparse_index_base base, \ rocsparse_matrix_init_kind init_kind, \ bool full_rank, \ bool to_int); #define INSTANTIATE3(ITYPE, JTYPE, TTYPE) \ template void rocsparse_init_csr_tridiagonal( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ JTYPE M, \ JTYPE N, \ ITYPE & nnz, \ rocsparse_index_base base, \ JTYPE l, \ JTYPE u); \ template void rocsparse_init_csr_pentadiagonal( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ JTYPE M, \ JTYPE N, \ ITYPE & nnz, \ rocsparse_index_base base, \ JTYPE ll, \ JTYPE l, \ JTYPE u, \ JTYPE uu); \ template void rocsparse_init_csr_laplace2d(std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ JTYPE & M, \ JTYPE & N, \ ITYPE & nnz, \ rocsparse_index_base base); \ template void rocsparse_init_csr_laplace3d(std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ int32_t dim_z, \ JTYPE & M, \ JTYPE & N, \ ITYPE & nnz, \ rocsparse_index_base base); \ template void rocsparse_init_csr_mtx(const char* filename, \ std::vector& csr_row_ptr, \ std::vector& csr_col_ind, \ std::vector& csr_val, \ JTYPE& M, \ JTYPE& N, \ ITYPE& nnz, \ rocsparse_index_base base); \ template void rocsparse_init_csr_rocalution(const char* filename, \ std::vector& row_ptr, \ std::vector& col_ind, \ std::vector& val, \ JTYPE& M, \ JTYPE& N, \ ITYPE& nnz, \ rocsparse_index_base base); \ template void rocsparse_init_csr_rocsparseio(const char* filename, \ std::vector& row_ptr, \ std::vector& col_ind, \ std::vector& val, \ JTYPE& M, \ JTYPE& N, \ ITYPE& nnz, \ rocsparse_index_base base); \ template void rocsparse_init_csr_random( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ JTYPE M, \ JTYPE N, \ ITYPE & nnz, \ rocsparse_index_base base, \ rocsparse_matrix_init_kind init_kind, \ bool full_rank, \ bool to_int); \ template void rocsparse_init_gebsr_tridiagonal( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ JTYPE Mb, \ JTYPE Nb, \ ITYPE & nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base, \ JTYPE l, \ JTYPE u); \ template void rocsparse_init_gebsr_pentadiagonal( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ JTYPE Mb, \ JTYPE Nb, \ ITYPE & nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base, \ JTYPE ll, \ JTYPE l, \ JTYPE u, \ JTYPE uu); \ template void rocsparse_init_gebsr_laplace2d( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ JTYPE & Mb, \ JTYPE & Nb, \ ITYPE & nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base); \ template void rocsparse_init_gebsr_laplace3d( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ int32_t dim_x, \ int32_t dim_y, \ int32_t dim_z, \ JTYPE & Mb, \ JTYPE & Nb, \ ITYPE & nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base); \ template void rocsparse_init_gebsr_mtx(const char* filename, \ std::vector& bsr_row_ptr, \ std::vector& bsr_col_ind, \ std::vector& bsr_val, \ JTYPE& Mb, \ JTYPE& Nb, \ ITYPE& nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base); \ template void rocsparse_init_gebsr_rocalution( \ const char* filename, \ std::vector& row_ptr, \ std::vector& col_ind, \ std::vector& val, \ JTYPE& Mb, \ JTYPE& Nb, \ ITYPE& nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base); \ template void rocsparse_init_gebsr_rocsparseio( \ const char* filename, \ std::vector& row_ptr, \ std::vector& col_ind, \ std::vector& val, \ rocsparse_direction dir, \ JTYPE& Mb, \ JTYPE& Nb, \ ITYPE& nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base); \ template void rocsparse_init_gebsr_random( \ std::vector & row_ptr, \ std::vector & col_ind, \ std::vector & val, \ JTYPE Mb, \ JTYPE Nb, \ ITYPE & nnzb, \ JTYPE row_block_dim, \ JTYPE col_block_dim, \ rocsparse_index_base base, \ rocsparse_matrix_init_kind init_kind, \ bool full_rank, \ bool to_int); \ template void host_csr_to_ell(JTYPE M, \ const std::vector& csr_row_ptr, \ const std::vector& csr_col_ind, \ const std::vector& csr_val, \ std::vector& ell_col_ind, \ std::vector& ell_val, \ JTYPE& ell_width, \ rocsparse_index_base csr_base, \ rocsparse_index_base ell_base); INSTANTIATEI(int32_t); INSTANTIATEI(int64_t); INSTANTIATE(int8_t); INSTANTIATE(int32_t); INSTANTIATE(int64_t); INSTANTIATE(size_t); INSTANTIATE(float); INSTANTIATE(double); INSTANTIATE(rocsparse_float_complex); INSTANTIATE(rocsparse_double_complex); INSTANTIATE1(int32_t, int32_t); INSTANTIATE1(int64_t, int32_t); INSTANTIATE1(int64_t, int64_t); INSTANTIATE2(int32_t, int8_t); INSTANTIATE2(int64_t, int8_t); INSTANTIATE2(int32_t, float); INSTANTIATE2(int64_t, float); INSTANTIATE2(int32_t, double); INSTANTIATE2(int64_t, double); INSTANTIATE2(int32_t, rocsparse_float_complex); INSTANTIATE2(int64_t, rocsparse_float_complex); INSTANTIATE2(int32_t, rocsparse_double_complex); INSTANTIATE2(int64_t, rocsparse_double_complex); INSTANTIATE3(int32_t, int32_t, int8_t); INSTANTIATE3(int64_t, int32_t, int8_t); INSTANTIATE3(int64_t, int64_t, int8_t); INSTANTIATE3(int32_t, int32_t, float); INSTANTIATE3(int64_t, int32_t, float); INSTANTIATE3(int64_t, int64_t, float); INSTANTIATE3(int32_t, int32_t, double); INSTANTIATE3(int64_t, int32_t, double); INSTANTIATE3(int64_t, int64_t, double); INSTANTIATE3(int32_t, int32_t, rocsparse_float_complex); INSTANTIATE3(int64_t, int32_t, rocsparse_float_complex); INSTANTIATE3(int64_t, int64_t, rocsparse_float_complex); INSTANTIATE3(int32_t, int32_t, rocsparse_double_complex); INSTANTIATE3(int64_t, int32_t, rocsparse_double_complex); INSTANTIATE3(int64_t, int64_t, rocsparse_double_complex); rocSPARSE-rocm-5.7.1/clients/common/rocsparse_matrix_factory.cpp000066400000000000000000001066021447342677400247630ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_matrix_factory.hpp" #include "rocsparse_clients_envariables.hpp" #include "rocsparse_init.hpp" static void get_matrix_full_filename(std::string& full_filename_, const std::string& filename_, const std::string& extension_, const bool timing_) { const bool envar_is_defined = rocsparse_clients_envariables::is_defined(rocsparse_clients_envariables::MATRICES_DIR); std::string path = rocsparse_exepath() + "../matrices/"; if(envar_is_defined) { path = rocsparse_clients_envariables::get(rocsparse_clients_envariables::MATRICES_DIR); path += "/"; } full_filename_ = timing_ ? ((envar_is_defined) ? (path + filename_) : filename_) : (path + filename_ + extension_); } // // Destructor. // template rocsparse_matrix_factory::~rocsparse_matrix_factory() { if(this->m_instance) { delete this->m_instance; this->m_instance = nullptr; } } // // Constructor. // template rocsparse_matrix_factory::rocsparse_matrix_factory(const Arguments& arg, rocsparse_matrix_init matrix, bool to_int, // = false bool full_rank, // = false bool noseed // = false ) : m_arg(arg) { // // FORCE REINIT. // if(false == noseed) { rocsparse_seedrand(); } switch(matrix) { case rocsparse_matrix_random: { rocsparse_matrix_init_kind matrix_init_kind = arg.matrix_init_kind; this->m_instance = new rocsparse_matrix_factory_random(full_rank, to_int, matrix_init_kind); break; } case rocsparse_matrix_laplace_2d: { this->m_instance = new rocsparse_matrix_factory_laplace2d(arg.dimx, arg.dimy); break; } case rocsparse_matrix_laplace_3d: { this->m_instance = new rocsparse_matrix_factory_laplace3d(arg.dimx, arg.dimy, arg.dimz); break; } case rocsparse_matrix_tridiagonal: { this->m_instance = new rocsparse_matrix_factory_tridiagonal(arg.l, arg.u); break; } case rocsparse_matrix_pentadiagonal: { this->m_instance = new rocsparse_matrix_factory_pentadiagonal(arg.ll, arg.l, arg.u, arg.uu); break; } case rocsparse_matrix_file_rocalution: { std::string full_filename; get_matrix_full_filename(full_filename, arg.filename, ".csr", arg.timing); this->m_instance = new rocsparse_matrix_factory_rocalution(full_filename.c_str(), to_int); break; } case rocsparse_matrix_file_rocsparseio: { std::string full_filename; get_matrix_full_filename(full_filename, arg.filename, ".bin", arg.timing); this->m_instance = new rocsparse_matrix_factory_rocsparseio(full_filename.c_str(), to_int); break; } case rocsparse_matrix_file_mtx: { std::string full_filename; get_matrix_full_filename(full_filename, arg.filename, ".mtx", arg.timing); this->m_instance = new rocsparse_matrix_factory_mtx(full_filename.c_str()); break; } case rocsparse_matrix_zero: { this->m_instance = new rocsparse_matrix_factory_zero(); break; } default: { this->m_instance = nullptr; break; } } assert(this->m_instance != nullptr); } // // Constructor. // template rocsparse_matrix_factory::rocsparse_matrix_factory(const Arguments& arg, bool to_int, // = false, bool full_rank, // = false, bool noseed) // = false) : rocsparse_matrix_factory(arg, arg.matrix, to_int, full_rank, noseed) { } // // COO // template void rocsparse_matrix_factory::init_coo(std::vector& coo_row_ind, std::vector& coo_col_ind, std::vector& coo_val, I& M, I& N, int64_t& nnz, rocsparse_index_base base) { this->m_instance->init_coo(coo_row_ind, coo_col_ind, coo_val, M, N, nnz, base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_coo(host_coo_matrix& that) { that.base = this->m_arg.baseA; that.m = this->m_arg.M; that.n = this->m_arg.N; this->m_instance->init_coo(that.row_ind, that.col_ind, that.val, that.m, that.n, that.nnz, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_coo(host_coo_matrix& that, I& M, I& N, rocsparse_index_base base) { that.base = base; that.m = M; that.n = N; this->m_instance->init_coo(that.row_ind, that.col_ind, that.val, that.m, that.n, that.nnz, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); M = that.m; N = that.n; } template void rocsparse_matrix_factory::init_coo(host_coo_matrix& that, I& M, I& N) { this->init_coo(that, M, N, this->m_arg.baseA); } // // CSR // template void rocsparse_matrix_factory::init_csr(std::vector& csr_row_ptr, std::vector& csr_col_ind, std::vector& csr_val, J& m, J& n, I& nnz, rocsparse_index_base base) { this->m_instance->init_csr(csr_row_ptr, csr_col_ind, csr_val, m, n, nnz, base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_csr(host_csr_matrix& that) { that.base = this->m_arg.baseA; that.m = this->m_arg.M; that.n = this->m_arg.N; this->m_instance->init_csr(that.ptr, that.ind, that.val, that.m, that.n, that.nnz, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_csr(host_csr_matrix& that, J& m, J& n, rocsparse_index_base base) { that.base = base; that.m = m; that.n = n; this->m_instance->init_csr(that.ptr, that.ind, that.val, that.m, that.n, that.nnz, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); m = that.m; n = that.n; } template void rocsparse_matrix_factory::init_csr(host_csr_matrix& that, J& m, J& n) { this->init_csr(that, m, n, this->m_arg.baseA); } // // CSC // template void rocsparse_matrix_factory::init_csc(std::vector& csc_col_ptr, std::vector& csc_row_ind, std::vector& csc_val, J& M, J& N, I& nnz, rocsparse_index_base base) { this->m_instance->init_csr(csc_col_ptr, csc_row_ind, csc_val, N, M, nnz, base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_csc(host_csc_matrix& that, J& m, J& n, rocsparse_index_base base) { that.base = base; this->m_instance->init_csr(that.ptr, that.ind, that.val, n, m, that.nnz, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); that.m = m; that.n = n; } // // GEBSR // template void rocsparse_matrix_factory::init_gebsr(std::vector& bsr_row_ptr, std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_direction dirb, J& Mb, J& Nb, I& nnzb, J& row_block_dim, J& col_block_dim, rocsparse_index_base base) { this->m_instance->init_gebsr(bsr_row_ptr, bsr_col_ind, bsr_val, dirb, Mb, Nb, nnzb, row_block_dim, col_block_dim, base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_gebsr(host_gebsr_matrix& that, rocsparse_direction block_dir_, J& mb_, J& nb_, I& nnzb_, J& row_block_dim_, J& col_block_dim_, rocsparse_index_base base_) { that.block_direction = block_dir_; that.mb = mb_; that.nb = nb_; that.row_block_dim = row_block_dim_; that.col_block_dim = col_block_dim_; that.base = base_; that.nnzb = nnzb_; this->m_instance->init_gebsr(that.ptr, that.ind, that.val, that.block_direction, that.mb, that.nb, that.nnzb, that.row_block_dim, that.col_block_dim, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); mb_ = that.mb; nb_ = that.nb; nnzb_ = that.nnzb; row_block_dim_ = that.row_block_dim; col_block_dim_ = that.col_block_dim; } template void rocsparse_matrix_factory::init_gebsr(host_gebsr_matrix& that) { that.block_direction = this->m_arg.direction; that.mb = this->m_arg.M; that.nb = this->m_arg.N; that.nnzb = this->m_arg.nnz; that.row_block_dim = this->m_arg.row_block_dimA; that.col_block_dim = this->m_arg.col_block_dimA; that.base = this->m_arg.baseA; this->m_instance->init_gebsr(that.ptr, that.ind, that.val, that.block_direction, that.mb, that.nb, that.nnzb, that.row_block_dim, that.col_block_dim, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template void rocsparse_matrix_factory::init_gebsr(host_gebsr_matrix& that, J& mb, J& nb, J& row_block_dim, J& col_block_dim, rocsparse_index_base base_) { that.base = base_; that.mb = mb; that.nb = nb; that.nnzb = this->m_arg.nnz; that.row_block_dim = row_block_dim; that.col_block_dim = col_block_dim; this->m_instance->init_gebsr(that.ptr, that.ind, that.val, that.block_direction, that.mb, that.nb, that.nnzb, that.row_block_dim, that.col_block_dim, that.base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); mb = that.mb; nb = that.nb; row_block_dim = that.row_block_dim; col_block_dim = that.col_block_dim; } template void rocsparse_matrix_factory::init_gebsr_spezial(host_gebsr_matrix& that, J& Mb, J& Nb) { I idx = 0; host_csr_matrix hA; rocsparse_direction direction = this->m_arg.direction; rocsparse_index_base base = this->m_arg.baseA; J row_block_dim = this->m_arg.row_block_dimA; J col_block_dim = this->m_arg.col_block_dimA; this->init_csr(hA, Mb, Nb, base); that.define(direction, Mb, Nb, hA.nnz, row_block_dim, col_block_dim, base); switch(direction) { case rocsparse_direction_column: { T* val = that.val; const I* hA_ptr = hA.ptr.data(); for(J i = 0; i < Mb; ++i) { for(J r = 0; r < row_block_dim; ++r) { for(I k = hA_ptr[i] - base; k < hA_ptr[i + 1] - base; ++k) { for(J c = 0; c < col_block_dim; ++c) { val[k * row_block_dim * col_block_dim + c * row_block_dim + r] = static_cast(++idx); } } } } break; } case rocsparse_direction_row: { T* val = that.val; const I* hA_ptr = hA.ptr.data(); for(J i = 0; i < Mb; ++i) { for(J r = 0; r < row_block_dim; ++r) { for(I k = hA_ptr[i] - base; k < hA_ptr[i + 1] - base; ++k) { for(J c = 0; c < col_block_dim; ++c) { val[k * row_block_dim * col_block_dim + r * col_block_dim + c] = static_cast(++idx); } } } } break; } } that.ptr.transfer_from(hA.ptr); that.ind.transfer_from(hA.ind); } template void rocsparse_matrix_factory::init_gebsc(std::vector& bsc_col_ptr, std::vector& bsc_row_ind, std::vector& bsc_val, rocsparse_direction dirb, J& Mb, J& Nb, I& nnzb, J& row_block_dim, J& col_block_dim, rocsparse_index_base base) { this->m_instance->init_gebsr(bsc_col_ptr, bsc_row_ind, bsc_val, dirb, Nb, Mb, nnzb, row_block_dim, col_block_dim, base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } // // BSR // template void rocsparse_matrix_factory::init_bsr(std::vector& bsr_row_ptr, std::vector& bsr_col_ind, std::vector& bsr_val, rocsparse_direction dirb, J& Mb, J& Nb, I& nnzb, J& block_dim, rocsparse_index_base base) { this->m_instance->init_gebsr(bsr_row_ptr, bsr_col_ind, bsr_val, dirb, Mb, Nb, nnzb, block_dim, block_dim, base, this->m_arg.matrix_type, this->m_arg.uplo, this->m_arg.storage); } template struct traits_init_bsr { static void init(rocsparse_matrix_factory& factory, host_gebsr_matrix& that, device_gebsr_matrix& that_on_device, J& mb_, J& nb_, rocsparse_index_base base_) { std::cout << "default traits_init_bsr not implemented (file: " << __FILE__ << ")" << std::endl; throw rocsparse_status_internal_error; } }; template struct traits_init_bsr< T, I, J, std::enable_if_t{} && std::is_same{}>> { static void init(rocsparse_matrix_factory& factory, host_gebsr_matrix& that, device_gebsr_matrix& that_on_device, J& mb_, J& nb_, rocsparse_index_base base_) { // // Initialize in case init_csr requires it as input. // rocsparse_int block_dim = factory.m_arg.block_dim; rocsparse_int M = mb_ * block_dim; rocsparse_int N = nb_ * block_dim; host_csr_matrix hA_uncompressed; // Always generate sorted CSR matrix as convert routine requires CSR matrix to be sorted hA_uncompressed.define(M, N, 0, base_); factory.init_csr(hA_uncompressed.ptr, hA_uncompressed.ind, hA_uncompressed.val, hA_uncompressed.m, hA_uncompressed.n, hA_uncompressed.nnz, hA_uncompressed.base, rocsparse_matrix_type_general, rocsparse_fill_mode_lower, rocsparse_storage_mode_sorted); { device_csr_matrix dA_uncompressed(hA_uncompressed); device_csr_matrix dA_compressed; rocsparse_matrix_utils::compress(dA_compressed, dA_uncompressed, base_); rocsparse_matrix_utils::convert(dA_compressed, factory.m_arg.direction, block_dim, base_, rocsparse_storage_mode_sorted, that_on_device); } that(that_on_device); mb_ = that.mb; nb_ = that.nb; switch(factory.m_arg.storage) { case rocsparse_storage_mode_unsorted: { rocsparse_matrix_utils::host_gebsrunsort( that.ptr.data(), that.ind.data(), that.mb, that.base); that_on_device(that); break; } case rocsparse_storage_mode_sorted: { break; } } }; }; template void rocsparse_matrix_factory::init_bsr(host_gebsr_matrix& that_, J& mb_, J& nb_, rocsparse_index_base base_) { device_gebsr_matrix dB; this->init_bsr(that_, dB, mb_, nb_, base_); } template void rocsparse_matrix_factory::init_bsr(host_gebsr_matrix& that_, device_gebsr_matrix& that_on_device_, J& mb_, J& nb_, rocsparse_index_base base_) { traits_init_bsr::init(*this, that_, that_on_device_, mb_, nb_, base_); } // // COO AOS // template struct traits_init_coo_aos { static void init(rocsparse_matrix_factory& factory, host_coo_aos_matrix& that, I& M, I& N, rocsparse_index_base base) { std::cerr << "non reachable " << __LINE__ << std::endl; exit(1); }; }; template struct traits_init_coo_aos{}>> { static void init(rocsparse_matrix_factory& factory, host_coo_aos_matrix& that, I& M, I& N, rocsparse_index_base base) { host_csr_matrix hA; factory.init_csr(hA, M, N, base); that.define(hA.m, hA.n, hA.nnz, hA.base); host_csr_to_coo_aos(hA.m, hA.nnz, hA.ptr, hA.ind, that.ind, hA.base); that.val.transfer_from(hA.val); }; }; template void rocsparse_matrix_factory::init_coo_aos(host_coo_aos_matrix& that, I& M, I& N, rocsparse_index_base base) { traits_init_coo_aos::init(*this, that, M, N, base); } // // ELL // template struct traits_init_ell { static void init(rocsparse_matrix_factory& factory, host_ell_matrix& that, I& M, I& N, rocsparse_index_base base) { std::cerr << "non reachable " << __LINE__ << std::endl; exit(1); }; }; template struct traits_init_ell{}>> { static void init(rocsparse_matrix_factory& factory, host_ell_matrix& that, I& M, I& N, rocsparse_index_base base) { host_csr_matrix hA; factory.init_csr(hA, M, N, base); that.define(hA.m, hA.n, 0, hA.base); host_csr_to_ell( hA.m, hA.ptr, hA.ind, hA.val, that.ind, that.val, that.width, hA.base, that.base); that.nnz = (int64_t)that.width * that.m; }; }; template void rocsparse_matrix_factory::init_ell(host_ell_matrix& that, I& M, I& N, rocsparse_index_base base) { traits_init_ell::init(*this, that, M, N, base); } // // HYB // template struct traits_init_hyb { static void init(rocsparse_matrix_factory& factory, rocsparse_hyb_mat that, I& M, I& N, I& nnz, rocsparse_index_base base, bool& conform) { std::cerr << "non reachable " << __LINE__ << std::endl; exit(1); }; }; template struct traits_init_hyb{} && std::is_same{}>> { static void init(rocsparse_matrix_factory& factory, rocsparse_hyb_mat that, I& M, I& N, I& nnz, rocsparse_index_base base, bool& conform) { conform = true; rocsparse_hyb_partition part = factory.m_arg.part; rocsparse_int user_ell_width = factory.m_arg.algo; host_csr_matrix hA; factory.init_csr(hA, M, N, base); nnz = hA.nnz; // ELL width limit rocsparse_int width_limit = 2 * (hA.nnz - 1) / M + 1; // Limit ELL user width if(part == rocsparse_hyb_partition_user) { user_ell_width *= (hA.nnz / M); user_ell_width = std::min(width_limit, user_ell_width); } if(part == rocsparse_hyb_partition_max) { // Compute max ELL width rocsparse_int ell_max_width = 0; for(rocsparse_int i = 0; i < M; ++i) { ell_max_width = std::max(hA.ptr[i + 1] - hA.ptr[i], ell_max_width); } if(ell_max_width > width_limit) { conform = false; return; } } device_csr_matrix dA(hA); rocsparse_handle handle; CHECK_ROCSPARSE_THROW_ERROR(rocsparse_create_handle(&handle)); rocsparse_mat_descr descr; CHECK_ROCSPARSE_THROW_ERROR(rocsparse_create_mat_descr(&descr)); // Set matrix index base CHECK_ROCSPARSE_THROW_ERROR(rocsparse_set_mat_index_base(descr, base)); // Convert CSR matrix to HYB CHECK_ROCSPARSE_THROW_ERROR(rocsparse_csr2hyb( handle, M, N, descr, dA.val, dA.ptr, dA.ind, that, user_ell_width, part)); CHECK_ROCSPARSE_THROW_ERROR(rocsparse_destroy_mat_descr(descr)); CHECK_ROCSPARSE_THROW_ERROR(rocsparse_destroy_handle(handle)); }; }; template void rocsparse_matrix_factory::init_hyb( rocsparse_hyb_mat that, I& M, I& N, I& nnz, rocsparse_index_base base, bool& conform) { traits_init_hyb::init(*this, that, M, N, nnz, base, conform); } // // INSTANTIATE. // template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; template struct rocsparse_matrix_factory; rocSPARSE-rocm-5.7.1/clients/common/rocsparse_matrix_factory_file.cpp000066400000000000000000000654731447342677400257740ustar00rootroot00000000000000/*! \file */ /* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "rocsparse_matrix_factory_file.hpp" #include "rocsparse_import.hpp" #include "rocsparse_importer_impls.hpp" #include "rocsparse_matrix_utils.hpp" template class VECTOR> static void apply_toint(VECTOR& data) { const size_t size = data.size(); for(size_t i = 0; i < size; ++i) { data[i] = std::abs(data[i]); } } template