pax_global_header00006660000000000000000000000064145456062360014525gustar00rootroot0000000000000052 comment=7b59f24005e1ae1c4d7a8069c00d94d92717fe86 rocminfo-rocm-6.1.2/000077500000000000000000000000001454560623600143055ustar00rootroot00000000000000rocminfo-rocm-6.1.2/CMakeLists.txt000077500000000000000000000167171454560623600170640ustar00rootroot00000000000000# # GCC 4.8 or higher compiler required. # # Required Defines on cmake command line # # 1) Set location of ROCR header files (required) # # ROCM_DIR="Root for RocM install" # # 2) Set ROCRTST_BLD_TYPE to either "Debug" or "Release". # If not set, the default value is "Debug" is bound. # # ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release # # 3) Set ROCRTST_BLD_BITS to either "32" or "64" # If not set, the default value of "64" is bound. # # ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64 # # Building rocminfo # # 1) Create build folder e.g. "rocminfo/build" - any name will do # 2) Cd into build folder # 3) Run cmake, passing in the above defines, as needed/required: # "cmake -DROCM_DIR= .." # 4) Run "make" # # Upon a successful build, the executable "rocminfo" will be in the # build directory. # # Currently support for Windows platform is not present # # # Minimum version of cmake required # cmake_minimum_required(VERSION 3.6.3) set(ROCMINFO_EXE "rocminfo") set(PROJECT_NAME ${ROCMINFO_EXE}) project (${PROJECT_NAME}) include ( GNUInstallDirs ) if(WIN32) message("This sample is not supported on Windows platform") return() endif() ## Set default module path if not already set if(NOT DEFINED CMAKE_MODULE_PATH) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/") endif() ## Include common cmake modules include(utils) # # Process input variables # find_package(hsa-runtime64 1.0 REQUIRED ) string(TOLOWER "${ROCRTST_BLD_TYPE}" tmp) if("${tmp}" STREQUAL release) set(BUILD_TYPE "Release") set(ISDEBUG 0) else() set(BUILD_TYPE "Debug") set(ISDEBUG 1) endif() # The following default version values should be updated as appropriate for # ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR). # Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden # by git tags (through "git describe") when they are present. set(PKG_VERSION_MAJOR 1) set(PKG_VERSION_MINOR 0) set(PKG_VERSION_PATCH 0) set(PKG_VERSION_NUM_COMMIT 0) ################# Determine the library version ######################### ## Setup the package version based on git tags. set(PKG_VERSION_GIT_TAG_PREFIX "rocminfo_pkg_ver") find_program (GIT NAMES git) get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT) # VERSION_* variables should be set by get_version_from_tag message("Package version: ${PKG_VERSION_STR}") # # Print out the build configuration being used: # # Build Src directory # Build Binary directory # Build Type: Debug Vs Release, 32 Vs 64 # Compiler Version, etc # message("") message("Build Configuration:") message("-----------BuildType: " ${BUILD_TYPE}) message("------------Compiler: " ${CMAKE_CXX_COMPILER}) message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION}) message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR}) message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR}) message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib) message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin) message("") # # Set the build type based on user input # set(CMAKE_BUILD_TYPE ${BUILD_TYPE}) # # Flag to enable / disable verbose output. # SET( CMAKE_VERBOSE_MAKEFILE on ) # # Compiler pre-processor definitions. # # Define MACRO "DEBUG" if build type is "Debug" if(${BUILD_TYPE} STREQUAL "Debug") add_definitions(-DDEBUG) endif() add_definitions(-DLITTLEENDIAN_CPU=1) # # Linux Compiler options # set(ROCMINFO_CXX_FLAGS -std=c++11) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fexceptions) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fno-rtti) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fno-math-errno) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fno-threadsafe-statics) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fmerge-all-constants) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fms-extensions) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Werror) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Wall) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -m64) # # Extend the compiler flags for 64-bit builds # if((${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "AMD64")) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -msse -msse2) endif() # # Add compiler flags to include symbol information for debug builds # if(ISDEBUG) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -ggdb -O0) endif() ########################### # rocm_agent_enumerator ########################### configure_file(rocm_agent_enumerator rocm_agent_enumerator COPYONLY) ########################### # RocR Info ########################### aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ROCMINFO_SOURCES) add_executable(${ROCMINFO_EXE} ${ROCMINFO_SOURCES}) target_link_libraries(${ROCMINFO_EXE} hsa-runtime64::hsa-runtime64) target_compile_options(${ROCMINFO_EXE} PRIVATE ${ROCMINFO_CXX_FLAGS}) ########################### # Install directives ########################### install ( TARGETS ${ROCMINFO_EXE} DESTINATION ${CMAKE_INSTALL_BINDIR} ) install ( PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/rocm_agent_enumerator DESTINATION ${CMAKE_INSTALL_BINDIR} ) ########################### # Packaging directives ########################### set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") set(CPACK_PACKAGE_VERSION_MAJOR "${PKG_VERSION_MAJOR}") set(CPACK_PACKAGE_VERSION_MINOR "${PKG_VERSION_MINOR}") set(CPACK_PACKAGE_VERSION_PATCH "${PKG_VERSION_PATCH}") set(CPACK_PACKAGE_CONTACT "AMD Rocminfo Support ") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/License.txt") set( CPACK_RPM_PACKAGE_LICENSE "NCSA" ) set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Radeon Open Compute (ROCm) Runtime rocminfo tool") #Install license file install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}) #Make proper version for appending #Default Value is 99999, setting it first set(ROCM_VERSION_FOR_PACKAGE "99999") if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION}) endif() #Debian package specific variables set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr, kmod, pciutils") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_DEBIAN_PACKAGE_HOMEPAGE} CACHE STRING "https://github.com/RadeonOpenCompute/ROCm") if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) else() set(CPACK_DEBIAN_PACKAGE_RELEASE "local") endif() if ( ROCM_DEP_ROCMCORE ) string ( APPEND CPACK_DEBIAN_PACKAGE_DEPENDS ", rocm-core" ) endif() #RPM package specific variables set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr kmod pciutils") if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) set ( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}" ) endif() if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) else() set(CPACK_RPM_PACKAGE_RELEASE "local") endif() if ( ROCM_DEP_ROCMCORE ) string ( APPEND CPACK_RPM_PACKAGE_REQUIRES " rocm-core" ) endif() #Set rpm distro if(CPACK_RPM_PACKAGE_RELEASE) set(CPACK_RPM_PACKAGE_RELEASE_DIST ON) endif() #Prepare final version for the CPACK use set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}") #Set the names now using CPACK utility set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") include ( CPack ) rocminfo-rocm-6.1.2/License.txt000066400000000000000000000033251454560623600164330ustar00rootroot00000000000000The University of Illinois/NCSA Open Source License (NCSA) Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved. Developed by: AMD Research and AMD HSA Software Development Advanced Micro Devices, Inc. www.amd.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. - Neither the names of Advanced Micro Devices, Inc, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. rocminfo-rocm-6.1.2/README.md000066400000000000000000000033771454560623600155760ustar00rootroot00000000000000# rocminfo ROCm Application for Reporting System Info ## To Build Use the standard cmake build procedure to build rocminfo. The location of ROCM root (parent directory containing ROCM headers and libraries) must be provided as a cmake argument using the standard CMAKE_PREFIX_PATH cmake variable. After cloning the rocminfo git repo, please make sure to do a git-fetch --tags to get the tags residing on the repo. These tags are used for versioning. For example, $ git fetch --tags origin Building from the CMakeLists.txt directory might look like this: mkdir -p build cd build cmake -DCMAKE_PREFIX_PATH=/opt/rocm .. make cd .. Upon a successful build the binary, rocminfo, and the python script, rocm_agent_enumerator, will be in the build folder. ## Execution "rocminfo" gives information about the HSA system attributes and agents. "rocm_agent_enumerator" prints the list of available AMD GCN ISA or architecture names. With the option '-name', it prints out available architectures names obtained from rocminfo. Otherwise, it generates ISA in one of five different ways: 1. ROCM_TARGET_LST : a user defined environment variable, set to the path and filename where to find the "target.lst" file. This can be used in an install environment with sandbox, where execution of "rocminfo" is not possible. 2. target.lst : user-supplied text file, in the same folder as "rocm_agent_enumerator". This is used in a container setting where ROCm stack may usually not available. 3. HSA topology : gathers the information from the HSA node topology in /sys/class/kfd/kfd/topology/nodes/ 4. lspci : enumerate PCI bus and locate supported devices from a hard-coded lookup table. 5. rocminfo : a tool shipped with this script to enumerate GPU agents available on a working ROCm stack. rocminfo-rocm-6.1.2/cmake_modules/000077500000000000000000000000001454560623600171155ustar00rootroot00000000000000rocminfo-rocm-6.1.2/cmake_modules/utils.cmake000077500000000000000000000145611454560623600212710ustar00rootroot00000000000000################################################################################ ## ## The University of Illinois/NCSA ## Open Source License (NCSA) ## ## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved. ## ## Developed by: ## ## AMD Research and AMD HSA Software Development ## ## Advanced Micro Devices, Inc. ## ## www.amd.com ## ## Permission is hereby granted, free of charge, to any person obtaining a copy ## of this software and associated documentation files (the "Software"), to ## deal with the Software without restriction, including without limitation ## the rights to use, copy, modify, merge, publish, distribute, sublicense, ## and#or sell copies of the Software, and to permit persons to whom the ## Software is furnished to do so, subject to the following conditions: ## ## - Redistributions of source code must retain the above copyright notice, ## this list of conditions and the following disclaimers. ## - Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimers in ## the documentation and#or other materials provided with the distribution. ## - Neither the names of Advanced Micro Devices, Inc, ## nor the names of its contributors may be used to endorse or promote ## products derived from this Software without specific prior written ## permission. ## ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ## DEALINGS WITH THE SOFTWARE. ## ################################################################################ ## Parses the VERSION_STRING variable and places ## the first, second and third number values in ## the major, minor and patch variables. function( parse_version VERSION_STRING ) string ( FIND ${VERSION_STRING} "-" STRING_INDEX ) if ( ${STRING_INDEX} GREATER -1 ) math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" ) string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD ) endif () string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} ) list ( LENGTH VERSIONS VERSION_COUNT ) if ( ${VERSION_COUNT} GREATER 0) list ( GET VERSIONS 0 MAJOR ) set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE ) set ( TEMP_VERSION_STRING "${MAJOR}" ) endif () if ( ${VERSION_COUNT} GREATER 1 ) list ( GET VERSIONS 1 MINOR ) set ( VERSION_MINOR ${MINOR} PARENT_SCOPE ) set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" ) endif () if ( ${VERSION_COUNT} GREATER 2 ) list ( GET VERSIONS 2 PATCH ) set ( VERSION_PATCH ${PATCH} PARENT_SCOPE ) set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" ) endif () set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE ) endfunction () ## Gets the current version of the repository ## using versioning tags and git describe. ## Passes back a packaging version string ## and a library version string. function(get_version_from_tag DEFAULT_VERSION_STRING VERSION_PREFIX GIT) parse_version ( ${DEFAULT_VERSION_STRING} ) if ( GIT ) execute_process ( COMMAND git describe --tags --dirty --long --match ${VERSION_PREFIX}-[0-9.]* WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_STRING OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE RESULT ) if ( ${RESULT} EQUAL 0 ) parse_version ( ${GIT_TAG_STRING} ) endif () endif () set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE ) set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE ) set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE ) endfunction() function(num_change_since_prev_pkg VERSION_PREFIX) find_program(get_commits NAMES version_util.sh PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules) if (get_commits) execute_process( COMMAND ${get_commits} -c ${VERSION_PREFIX} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE NUM_COMMITS OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE RESULT ) set(NUM_COMMITS "${NUM_COMMITS}" PARENT_SCOPE ) if ( ${RESULT} EQUAL 0 ) message("${NUM_COMMITS} were found since previous release") else() message("Unable to determine number of commits since previous release") endif() else() message("WARNING: Didn't find version_util.sh") set(NUM_COMMITS "unknown" PARENT_SCOPE ) endif() endfunction() function(get_package_version_number DEFAULT_VERSION_STRING VERSION_PREFIX GIT) get_version_from_tag(${DEFAULT_VERSION_STRING} ${VERSION_PREFIX} GIT) num_change_since_prev_pkg(${VERSION_PREFIX}) set(PKG_VERSION_STR "${VERSION_STRING}.${NUM_COMMITS}") if (DEFINED ENV{ROCM_BUILD_ID}) set(VERSION_ID $ENV{ROCM_BUILD_ID}) else() set(VERSION_ID "local_build-0") endif() set(PKG_VERSION_STR "${PKG_VERSION_STR}.${VERSION_ID}") if (GIT) execute_process(COMMAND git rev-parse --short HEAD WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE VERSION_HASH OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE RESULT ) if( ${RESULT} EQUAL 0 ) # Check for dirty workspace. execute_process(COMMAND git diff --quiet WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} RESULT_VARIABLE RESULT ) if(${RESULT} EQUAL 1) set(VERSION_HASH "${VERSION_HASH}-dirty") endif() else() set( VERSION_HASH "unknown" ) endif() else() set( VERSION_HASH "unknown" ) endif() set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_HASH}") set(PKG_VERSION_STR ${PKG_VERSION_STR} PARENT_SCOPE) endfunction() rocminfo-rocm-6.1.2/cmake_modules/version_util.sh000077500000000000000000000015041454560623600221760ustar00rootroot00000000000000#!/bin/bash # Handle commandline args while [ "$1" != "" ]; do case $1 in -c ) # Commits since prevous tag TARGET="count" ;; * ) TARGET="count" break ;; esac shift 1 done TAG_PREFIX=$1 reg_ex="${TAG_PREFIX}*" commits_since_last_tag() { TAG_ARR=(`git tag --sort=committerdate -l ${reg_ex} | tail -2`) PREVIOUS_TAG=${TAG_ARR[0]} CURRENT_TAG=${TAG_ARR[1]} PREV_CMT_NUM=`git rev-list --count $PREVIOUS_TAG` CURR_CMT_NUM=`git rev-list --count $CURRENT_TAG` # Commits since prevous tag: if [[ -z $PREV_CMT_NUM || -z $CURR_CMT_NUM ]]; then let NUM_COMMITS="0" else let NUM_COMMITS="${CURR_CMT_NUM}-${PREV_CMT_NUM}" fi echo $NUM_COMMITS } case $TARGET in count) commits_since_last_tag ;; *) die "Invalid target $target" ;; esac exit 0 rocminfo-rocm-6.1.2/rocm_agent_enumerator000077500000000000000000000235111454560623600206140ustar00rootroot00000000000000#!/usr/bin/env python3 import os import re import subprocess import sys import time # get current working directory CWD = os.path.dirname(os.path.realpath(__file__)) ISA_TO_ID = { # Kaveri - Temporary "gfx700" : [0x1304, 0x1305, 0x1306, 0x1307, 0x1309, 0x130a, 0x130b, 0x130c, 0x130d, 0x130e, 0x130f, 0x1310, 0x1311, 0x1312, 0x1313, 0x1315, 0x1316, 0x1317, 0x1318, 0x131b, 0x131c, 0x131d], # Hawaii "gfx701" : [0x67a0, 0x67a1, 0x67a2, 0x67a8, 0x67a9, 0x67aa, 0x67b0, 0x67b1, 0x67b8, 0x67b9, 0x67ba, 0x67be], # Carrizo "gfx801" : [0x9870, 0x9874, 0x9875, 0x9876, 0x9877, 0x98e4], # Tonga "gfx802" : [0x6920, 0x6921, 0x6928, 0x6929, 0x692b, 0x692f, 0x6930, 0x6938, 0x6939], # Fiji "gfx803" : [0x7300, 0x730f, # Polaris10 0x67c0, 0x67c1, 0x67c2, 0x67c4, 0x67c7, 0x67c8, 0x67c9, 0x67ca, 0x67cc, 0x67cf, 0x6fdf, # Polaris11 0x67d0, 0x67df, 0x67e0, 0x67e1, 0x67e3, 0x67e7, 0x67e8, 0x67e9, 0x67eb, 0x67ef, 0x67ff, # Polaris12 0x6980, 0x6981, 0x6985, 0x6986, 0x6987, 0x6995, 0x6997, 0x699f, # VegaM 0x694c, 0x694e, 0x694f], # Vega10 "gfx900" : [0x6860, 0x6861, 0x6862, 0x6863, 0x6864, 0x6867, 0x6868, 0x6869, 0x6869, 0x686a, 0x686b, 0x686c, 0x686d, 0x686e, 0x686f, 0x687f], # Raven "gfx902" : [0x15dd, 0x15d8], # Vega12 "gfx904" : [0x69a0, 0x69a1, 0x69a2, 0x69a3, 0x69af], # Vega20 "gfx906" : [0x66a0, 0x66a1, 0x66a2, 0x66a3, 0x66a4, 0x66a7, 0x66af], # Arcturus "gfx908" : [0x738c, 0x7388, 0x738e, 0x7390], # Aldebaran "gfx90a" : [0x7408, 0x740c, 0x740f, 0x7410], # Renoir "gfx90c" : [0x15e7, 0x1636, 0x1638, 0x164c], # Navi10 "gfx1010" : [0x7310, 0x7312, 0x7318, 0x7319, 0x731a, 0x731b, 0x731e, 0x731f], # Navi12 "gfx1011" : [0x7360, 0x7362], # Navi14 "gfx1012" : [0x7340, 0x7341, 0x7347, 0x734f], # Cyan_Skillfish "gfx1013" : [0x13f9, 0x13fa, 0x13fb, 0x13fc, 0x13f3], # Sienna_Cichlid "gfx1030" : [0x73a0, 0x73a1, 0x73a2, 0x73a3, 0x73a5, 0x73a8, 0x73a9, 0x73ab, 0x73ac, 0x73ad, 0x73ae, 0x73af, 0x73bf], # Navy_Flounder "gfx1031" : [0x73c0, 0x73c1, 0x73c3, 0x73da, 0x73db, 0x73dc, 0x73dd, 0x73de, 0x73df], # Dimgray_Cavefish "gfx1032" : [0x73e0, 0x73e1, 0x73e2, 0x73e3, 0x73e8, 0x73e9, 0x73ea, 0x73eb, 0x73ec, 0x73ed, 0x73ef, 0x73ff], # Van Gogh "gfx1033" : [0x163f], # Beige_Goby "gfx1034" : [0x7420, 0x7421, 0x7422, 0x7423, 0x743f], # Yellow_Carp "gfx1035" : [0x164d, 0x1681] } def staticVars(**kwargs): def deco(func): for k in kwargs: setattr(func, k, kwargs[k]) return func return deco @staticVars(search_term=re.compile("gfx[0-9a-fA-F]+")) def getGCNISA(line, match_from_beginning = False): if match_from_beginning is True: result = getGCNISA.search_term.match(line) else: result = getGCNISA.search_term.search(line) if result is not None: return result.group(0) return None @staticVars(search_name=re.compile(r"gfx[0-9a-fA-F]+(:[-+:\w]+)?")) def getGCNArchName(line): result = getGCNArchName.search_name.search(line) if result is not None: return result.group(0) return None def readFromTargetLstFile(): target_list = [] # locate target.lst using environment variable or # it should be placed at the same directory with this script target_lst_path = os.environ.get("ROCM_TARGET_LST"); if target_lst_path == None: target_lst_path = os.path.join(CWD, "target.lst") if os.path.isfile(target_lst_path): target_lst_file = open(target_lst_path, 'r') for line in target_lst_file: # for target.lst match from beginning so targets can be disabled by # commenting it out target = getGCNISA(line, match_from_beginning = True) if target is not None: target_list.append(target) return target_list def readFromROCMINFO(search_arch_name = False): target_list = [] # locate rocminfo binary which should be placed at the same directory with # this script rocminfo_executable = os.path.join(CWD, "rocminfo") try: t0 = time.time() while 1: t1 = time.time() # quit after retrying rocminfo for a minute. if t1 - t0 > 60.0: print("Timeout querying rocminfo. Are you compiling with more than 254 threads?") break # run rocminfo rocminfo_output = subprocess.Popen(rocminfo_executable, stdout=subprocess.PIPE).communicate()[0].decode("utf-8").split('\n') term1 = re.compile("Cannot allocate memory") term2 = re.compile("HSA_STATUS_ERROR_OUT_OF_RESOURCES") done = 1 for line in rocminfo_output: if term1.search(line) is not None or term2.search(line) is not None: done = 0 break if done: break except: rocminfo_output = [] # search AMDGCN gfx ISA if search_arch_name is True: line_search_term = re.compile(r"\A\s+Name:\s+(amdgcn-amd-amdhsa--gfx\d+)") else: line_search_term = re.compile(r"\A\s+Name:\s+(gfx\d+)") for line in rocminfo_output: if line_search_term.match(line) is not None: if search_arch_name is True: target = getGCNArchName(line) else: target = getGCNISA(line) if target is not None: target_list.append(target) return target_list def readFromLSPCI(): target_list = [] try: # run lspci lspci_output = subprocess.Popen(["/usr/bin/lspci", "-n", "-d", "1002:"], stdout=subprocess.PIPE).communicate()[0].decode("utf-8").split('\n') except: lspci_output = [] target_search_term = re.compile(r"1002:\w+") for line in lspci_output: search_result = target_search_term.search(line) if search_result is not None: device_id = int(search_result.group(0).split(':')[1], 16) # try lookup from ISA_TO_ID dict for target in ISA_TO_ID.keys(): for target_device_id in ISA_TO_ID[target]: if device_id == target_device_id: target_list.append(target) break return target_list def readFromKFD(): target_list = [] topology_dir = '/sys/class/kfd/kfd/topology/nodes/' if os.path.isdir(topology_dir): for node in sorted(os.listdir(topology_dir)): node_path = os.path.join(topology_dir, node) if os.path.isdir(node_path): prop_path = node_path + '/properties' if os.path.isfile(prop_path) and os.access(prop_path, os.R_OK): target_search_term = re.compile("gfx_target_version.+") with open(prop_path) as f: try: line = f.readline() except PermissionError: # We may have a subsystem (e.g. scheduler) limiting device visibility which # could cause a permission error. line = '' while line != '' : search_result = target_search_term.search(line) if search_result is not None: device_id = int(search_result.group(0).split(' ')[1], 10) if device_id != 0: gfx_override = os.environ.get("HSA_OVERRIDE_GFX_VERSION") if gfx_override is not None: try: override_tokens = gfx_override.split('.') major_ver=int(override_tokens[0]) minor_ver=int(override_tokens[1]) stepping_ver=int(override_tokens[2]) if major_ver > 63 or minor_ver > 255 or stepping_ver > 255: print('Invalid HSA_OVERRIDE_GFX_VERSION value') major_ver = 0 minor_ver = 0 stepping_ver = 0 except Exception as e: print('Invalid HSA_OVERRIDE_GFX_VERSION format expected \"1.2.3\"') major_ver = 0 minor_ver = 0 stepping_ver = 0 else: major_ver = int((device_id / 10000) % 100) minor_ver = int((device_id / 100) % 100) stepping_ver = int(device_id % 100) target_list.append("gfx" + format(major_ver, 'd') + format(minor_ver, 'x') + format(stepping_ver, 'x')) line = f.readline() return target_list def main(): if len(sys.argv) == 2 and sys.argv[1] == '-name' : """ Prints the list of available AMD GCN target names extracted from rocminfo, a tool shipped with this script to enumerate GPU agents available on a working ROCm stack.""" target_list = readFromROCMINFO(True) else: """Prints the list of available AMD GCN ISA The program collects the list in 3 different ways, in the order of precendence: 1. ROCM_TARGET_LST : a user defined environment variable, set to the path and filename where to find the "target.lst" file. This can be used in an install environment with sandbox, where execution of "rocminfo" is not possible. 2. target.lst : user-supplied text file. This is used in a container setting where ROCm stack may usually not available. 3. HSA topology : gathers the information from the HSA node topology in /sys/class/kfd/kfd/topology/nodes/ 4. lspci : enumerate PCI bus and locate supported devices from a hard-coded lookup table. 5. rocminfo : a tool shipped with this script to enumerate GPU agents available on a working ROCm stack. """ target_list = readFromTargetLstFile() if len(target_list) == 0: target_list = readFromKFD() if len(target_list) == 0: target_list = readFromLSPCI() if len(target_list) == 0: target_list = readFromROCMINFO() # workaround to cope with existing rocm_agent_enumerator behavior where gfx000 # would always be returned print("gfx000") for gfx in target_list: print(gfx) if __name__ == "__main__": main() rocminfo-rocm-6.1.2/rocminfo.cc000077500000000000000000001257431454560623600164470ustar00rootroot00000000000000/* * ============================================================================= * ROC Runtime Conformance Release License * ============================================================================= * The University of Illinois/NCSA * Open Source License (NCSA) * * Copyright (c) 2017, Advanced Micro Devices, Inc. * All rights reserved. * * Developed by: * * AMD Research and AMD ROC Software Development * * Advanced Micro Devices, Inc. * * www.amd.com * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal with the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimers. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimers in * the documentation and/or other materials provided with the distribution. * - Neither the names of , * nor the names of its contributors may be used to endorse or promote * products derived from this Software without specific prior written * permission. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS WITH THE SOFTWARE. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" #define COL_BLU "\x1B[34m" #define COL_KCYN "\x1B[36m" #define COL_GRN "\x1B[32m" #define COL_NRM "\x1B[0m" #define COL_RED "\x1B[31m" #define COL_MAG "\x1B[35m" #define COL_WHT "\x1B[37m" #define COL_YEL "\x1B[33m" #define COL_RESET "\033[0m" #define RET_IF_HSA_ERR(err) { \ if ((err) != HSA_STATUS_SUCCESS) { \ char err_val[12]; \ char* err_str = NULL; \ if (hsa_status_string(err, \ (const char**)&err_str) != HSA_STATUS_SUCCESS) { \ snprintf(&(err_val[0]), sizeof(err_val), "%#x", (uint32_t)err); \ err_str = &(err_val[0]); \ } \ printf("%shsa api call failure at: %s:%d\n", \ COL_RED, __FILE__, __LINE__); \ printf("%sCall returned %s\n", COL_RED, err_str); \ printf("%s", COL_RESET); \ return (err); \ } \ } // This structure holds system information acquired through hsa info related // calls, and is later used for reference when displaying the information. struct system_info_t { uint16_t major, minor; uint16_t ext_major, ext_minor; uint64_t timestamp_frequency = 0; uint64_t max_wait = 0; hsa_endianness_t endianness; hsa_machine_model_t machine_model; bool mwaitx_enabled; bool dmabuf_support; }; // This structure holds agent information acquired through hsa info related // calls, and is later used for reference when displaying the information. struct agent_info_t { char name[64]; char uuid[24]; char vendor_name[64]; char device_mkt_name[64]; hsa_agent_feature_t agent_feature; hsa_profile_t agent_profile; hsa_default_float_rounding_mode_t float_rounding_mode; uint32_t max_queue; uint32_t queue_min_size; uint32_t queue_max_size; hsa_queue_type_t queue_type; uint32_t node; hsa_device_type_t device_type; uint32_t cache_size[4]; uint32_t chip_id; uint32_t asic_revision; uint32_t cacheline_size; uint32_t max_clock_freq; uint32_t internal_node_id; uint32_t max_addr_watch_pts; // HSA_AMD_AGENT_INFO_MEMORY_WIDTH is deprecated, so exclude // uint32_t mem_max_freq; Not supported by get_info uint32_t compute_unit; uint32_t wavefront_size; uint32_t workgroup_max_size; uint32_t grid_max_size; uint32_t fbarrier_max_size; uint32_t max_waves_per_cu; uint32_t simds_per_cu; uint32_t shader_engs; uint32_t shader_arrs_per_sh_eng; hsa_isa_t agent_isa; hsa_dim3_t grid_max_dim; uint16_t workgroup_max_dim[3]; uint16_t bdf_id; bool fast_f16; bool coherent_host_access; uint32_t pkt_processor_ucode_ver; uint32_t sdma_ucode_ver; hsa_amd_iommu_version_t iommu_support; }; // This structure holds memory pool information acquired through hsa info // related calls, and is later used for reference when displaying the // information. typedef struct { uint32_t segment; size_t pool_size; bool alloc_allowed; size_t alloc_granule; size_t alloc_rec_granule; size_t pool_alloc_alignment; bool pl_access; uint32_t global_flag; } pool_info_t; // This structure holds ISA information acquired through hsa info // related calls, and is later used for reference when displaying the // information. struct isa_info_t { char *name_str; uint32_t workgroup_max_size; hsa_dim3_t grid_max_dim; uint64_t grid_max_size; uint32_t fbarrier_max_size; uint16_t workgroup_max_dim[3]; bool def_rounding_modes[3]; bool base_rounding_modes[3]; bool mach_models[2]; bool profiles[2]; bool fast_f16; }; // This structure holds cache information acquired through hsa info // related calls, and is later used for reference when displaying the // information. struct cache_info_t { char *name_str; uint8_t level; uint32_t size; }; static const uint32_t kLabelFieldSize = 25; static const uint32_t kValueFieldSize = 35; static const uint32_t kIndentSize = 2; enum rocmi_int_format { ROCMI_INT_FORMAT_DEC = 1, ROCMI_INT_FORMAT_HEX = 2, }; // Make the most common format the default std::string int_to_string(uint32_t i, uint32_t fmt = ROCMI_INT_FORMAT_DEC|ROCMI_INT_FORMAT_HEX) { std::stringstream sd; bool need_parens = false; if (fmt & ROCMI_INT_FORMAT_DEC) { if (need_parens) { sd << "("; } sd << i; if (need_parens) { sd << ") "; } need_parens = true; } if (fmt & ROCMI_INT_FORMAT_HEX) { if (need_parens) { sd << "(0x"; } sd << std::hex << i; if (need_parens) { sd << ") "; } } return sd.str(); } static void printLabelInt(char const *l, int d, uint32_t indent_lvl = 0) { std::string ind(kIndentSize * indent_lvl, ' '); printf("%s%-*s%-*d\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, d); } static void printLabelStr(char const *l, char const *s, uint32_t indent_lvl = 0) { std::string ind(kIndentSize * indent_lvl, ' '); printf("%s%-*s%-*s\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, s); } static void printLabelStr(char const *l, std::string const &s, uint32_t indent_lvl = 0) { std::string ind(kIndentSize * indent_lvl, ' '); printf("%s%-*s%-*s\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, s.c_str()); } static void printLabel(char const *l, bool newline = false, uint32_t indent_lvl = 0) { std::string ind(kIndentSize * indent_lvl, ' '); printf("%s%-*s", ind.c_str(), kLabelFieldSize, l); if (newline) { printf("\n"); } } static void printValueStr(char const *s, bool newline = true) { printf("%-*s\n", kValueFieldSize, s); } // Acquire system information static hsa_status_t AcquireSystemInfo(system_info_t *sys_info) { hsa_status_t err; // Get Major and Minor version of runtime err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &sys_info->major); RET_IF_HSA_ERR(err); err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor); RET_IF_HSA_ERR(err); // Get HSA Ext Interface version err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_EXT_VERSION_MAJOR, &sys_info->ext_major); RET_IF_HSA_ERR(err); err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_EXT_VERSION_MINOR, &sys_info->ext_minor); RET_IF_HSA_ERR(err); // Get timestamp frequency err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_info->timestamp_frequency); RET_IF_HSA_ERR(err); // Get maximum duration of a signal wait operation err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &sys_info->max_wait); RET_IF_HSA_ERR(err); // Get Endianness of the system err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &sys_info->endianness); RET_IF_HSA_ERR(err); // Get machine model info err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &sys_info->machine_model); RET_IF_HSA_ERR(err); // Get mwaitx mode err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED, &sys_info->mwaitx_enabled); // Get DMABuf support err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED, &sys_info->dmabuf_support); RET_IF_HSA_ERR(err); return err; } static void DisplaySystemInfo(system_info_t const *sys_info) { printLabel("Runtime Version:"); printf("%d.%d\n", sys_info->major, sys_info->minor); printLabel("Runtime Ext Version:"); printf("%d.%d\n", sys_info->ext_major, sys_info->ext_minor); printLabel("System Timestamp Freq.:"); printf("%fMHz\n", sys_info->timestamp_frequency / 1e6); printLabel("Sig. Max Wait Duration:"); printf("%lu (0x%lX) (timestamp count)\n", sys_info->max_wait, sys_info->max_wait); printLabel("Machine Model:"); if (HSA_MACHINE_MODEL_SMALL == sys_info->machine_model) { printValueStr("SMALL"); } else if (HSA_MACHINE_MODEL_LARGE == sys_info->machine_model) { printValueStr("LARGE"); } printLabel("System Endianness:"); if (HSA_ENDIANNESS_LITTLE == sys_info->endianness) { printValueStr("LITTLE"); } else if (HSA_ENDIANNESS_BIG == sys_info->endianness) { printValueStr("BIG"); } printLabel("Mwaitx:"); printf("%s\n", sys_info->mwaitx_enabled ? "ENABLED" : "DISABLED"); printLabel("DMAbuf Support:"); printf("%s\n", sys_info->dmabuf_support ? "YES" : "NO"); printf("\n"); } static hsa_status_t AcquireAgentInfo(hsa_agent_t agent, agent_info_t *agent_i) { hsa_status_t err; // Get agent name and vendor err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_i->name); RET_IF_HSA_ERR(err); // Get UUID, an Ascii string, of a ROCm device err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UUID, &agent_i->uuid); // Get device's vendor name err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, &agent_i->vendor_name); RET_IF_HSA_ERR(err); // Get device marketing name err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, &agent_i->device_mkt_name); RET_IF_HSA_ERR(err); // Get agent feature err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_i->agent_feature); RET_IF_HSA_ERR(err); // Get profile supported by the agent err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_i->agent_profile); RET_IF_HSA_ERR(err); // Get floating-point rounding mode err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &agent_i->float_rounding_mode); RET_IF_HSA_ERR(err); // Get max number of queue err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &agent_i->max_queue); RET_IF_HSA_ERR(err); // Get queue min size err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &agent_i->queue_min_size); RET_IF_HSA_ERR(err); // Get queue max size err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_i->queue_max_size); RET_IF_HSA_ERR(err); // Get queue type err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &agent_i->queue_type); RET_IF_HSA_ERR(err); // Get agent node err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &agent_i->node); RET_IF_HSA_ERR(err); // Get device type err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_i->device_type); RET_IF_HSA_ERR(err); if (HSA_DEVICE_TYPE_GPU == agent_i->device_type) { err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_i->agent_isa); RET_IF_HSA_ERR(err); } // Get cache size err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, agent_i->cache_size); RET_IF_HSA_ERR(err); // Get chip id err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID, &agent_i->chip_id); RET_IF_HSA_ERR(err); // Get asic revision err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_ASIC_REVISION, &agent_i->asic_revision); RET_IF_HSA_ERR(err); // Get cacheline size err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &agent_i->cacheline_size); RET_IF_HSA_ERR(err); // Get Max clock frequency err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &agent_i->max_clock_freq); RET_IF_HSA_ERR(err); // Internal Driver node ID err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &agent_i->internal_node_id); RET_IF_HSA_ERR(err); // Max number of watch points on mem. addr. ranges to generate exeception // events err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS, &agent_i->max_addr_watch_pts); RET_IF_HSA_ERR(err); // Get Agent BDFID err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &agent_i->bdf_id); RET_IF_HSA_ERR(err); // Get Max Memory Clock // Not supported by hsa_agent_get_info // err = hsa_agent_get_info(agent,d // (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY, // &agent_i->mem_max_freq); // RET_IF_HSA_ERR(err); // Get Num SIMDs per CU err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &agent_i->simds_per_cu); RET_IF_HSA_ERR(err); // Get Num Shader Engines err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &agent_i->shader_engs); RET_IF_HSA_ERR(err); // Get Num Shader Arrays per Shader engine err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE, &agent_i->shader_arrs_per_sh_eng); RET_IF_HSA_ERR(err); // Get number of Compute Unit err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &agent_i->compute_unit); RET_IF_HSA_ERR(err); // Get coherent Host access err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS, &agent_i->coherent_host_access); RET_IF_HSA_ERR(err); // Check if the agent is kernel agent if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { // Get flaf of fast_f16 operation err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &agent_i->fast_f16); RET_IF_HSA_ERR(err); // Get wavefront size err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_i->wavefront_size); RET_IF_HSA_ERR(err); // Get max total number of work-items in a workgroup err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &agent_i->workgroup_max_size); RET_IF_HSA_ERR(err); // Get max number of work-items of each dimension of a work-group err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &agent_i->workgroup_max_dim); RET_IF_HSA_ERR(err); // Get max number of a grid per dimension err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &agent_i->grid_max_dim); RET_IF_HSA_ERR(err); // Get max total number of work-items in a grid err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &agent_i->grid_max_size); RET_IF_HSA_ERR(err); // Get max number of fbarriers per work group err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &agent_i->fbarrier_max_size); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, &agent_i->max_waves_per_cu); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UCODE_VERSION, &agent_i->pkt_processor_ucode_ver); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION, &agent_i->sdma_ucode_ver); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_IOMMU_SUPPORT, &agent_i->iommu_support); RET_IF_HSA_ERR(err); } return err; } static void DisplayAgentInfo(agent_info_t *agent_i) { printLabelStr("Name:", agent_i->name, 1); printLabelStr("Uuid:", agent_i->uuid, 1); printLabelStr("Marketing Name:", agent_i->device_mkt_name, 1); printLabelStr("Vendor Name:", agent_i->vendor_name, 1); printLabel("Feature:", false, 1); if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH && agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) { printValueStr("KERNEL_DISPATCH & AGENT_DISPATCH"); } else if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { printValueStr("KERNEL_DISPATCH"); } else if (agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) { printValueStr("AGENT_DISPATCH"); } else { printValueStr("None specified"); } printLabel("Profile:", false, 1); if (HSA_PROFILE_BASE == agent_i->agent_profile) { printValueStr("BASE_PROFILE"); } else if (HSA_PROFILE_FULL == agent_i->agent_profile) { printValueStr("FULL_PROFILE"); } else { printValueStr("Unknown"); } printLabel("Float Round Mode:", false, 1); if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO == agent_i->float_rounding_mode) { printValueStr("ZERO"); } else if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR == agent_i->float_rounding_mode) { printValueStr("NEAR"); } else { printValueStr("Not Supported"); } printLabelStr("Max Queue Number:", int_to_string(agent_i->max_queue), 1); printLabelStr("Queue Min Size:", int_to_string(agent_i->queue_min_size), 1); printLabelStr("Queue Max Size:", int_to_string(agent_i->queue_max_size), 1); if (HSA_QUEUE_TYPE_MULTI == agent_i->queue_type) { printLabelStr("Queue Type:", "MULTI", 1); } else if (HSA_QUEUE_TYPE_SINGLE == agent_i->queue_type) { printLabelStr("Queue Type:", "SINGLE", 1); } else { printLabelStr("Queue Type:", "Unknown", 1); } printLabelInt("Node:", agent_i->node, 1); printLabel("Device Type:", false, 1); if (HSA_DEVICE_TYPE_CPU == agent_i->device_type) { printValueStr("CPU"); } else if (HSA_DEVICE_TYPE_GPU == agent_i->device_type) { printValueStr("GPU"); } else { printValueStr("DSP"); } printLabel("Cache Info:", true, 1); for (int i = 0; i < 4; i++) { if (agent_i->cache_size[i]) { std::string tmp_str("L"); tmp_str += std::to_string(i+1); tmp_str += ":"; printLabel(tmp_str.c_str(), false, 2); // tmp_str = std::to_string(agent_i->cache_size[i]/1024); tmp_str = int_to_string(agent_i->cache_size[i]/1024); tmp_str += "KB"; printValueStr(tmp_str.c_str()); } } printLabelStr("Chip ID:", int_to_string(agent_i->chip_id), 1); printLabelStr("ASIC Revision:", int_to_string(agent_i->asic_revision), 1); printLabelStr("Cacheline Size:", int_to_string(agent_i->cacheline_size), 1); printLabelInt("Max Clock Freq. (MHz):", agent_i->max_clock_freq, 1); printLabelInt("BDFID:", agent_i->bdf_id, 1); printLabelInt("Internal Node ID:", agent_i->internal_node_id, 1); printLabelInt("Compute Unit:", agent_i->compute_unit, 1); printLabelInt("SIMDs per CU:", agent_i->simds_per_cu, 1); printLabelInt("Shader Engines:", agent_i->shader_engs, 1); printLabelInt("Shader Arrs. per Eng.:", agent_i->shader_arrs_per_sh_eng, 1); printLabelInt("WatchPts on Addr. Ranges:", agent_i->max_addr_watch_pts, 1); if (agent_i->device_type == HSA_DEVICE_TYPE_GPU) printLabelStr("Coherent Host Access:", agent_i->coherent_host_access ? "TRUE":"FALSE", 1); printLabel("Features:", false, 1); if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { printf("%s", "KERNEL_DISPATCH "); } if (agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) { printf("%s", "AGENT_DISPATCH"); } if (agent_i->agent_feature == 0) { printf("None"); } printf("\n"); if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { printLabelStr("Fast F16 Operation:", agent_i->fast_f16 ? "TRUE":"FALSE", 1); printLabelStr("Wavefront Size:", int_to_string(agent_i->wavefront_size), 1); printLabelStr("Workgroup Max Size:", int_to_string(agent_i->workgroup_max_size), 1); printLabel("Workgroup Max Size per Dimension:", true, 1); printLabelStr("x", int_to_string(static_cast(agent_i->workgroup_max_dim[0])), 2); printLabelStr("y", int_to_string(static_cast(agent_i->workgroup_max_dim[1])), 2); printLabelStr("z", int_to_string(static_cast(agent_i->workgroup_max_dim[2])), 2); printLabelStr("Max Waves Per CU:", int_to_string(agent_i->max_waves_per_cu), 1); printLabelStr("Max Work-item Per CU:", int_to_string(agent_i->wavefront_size*agent_i->max_waves_per_cu), 1); printLabelStr("Grid Max Size:", int_to_string(agent_i->grid_max_size), 1); printLabel("Grid Max Size per Dimension:", true, 1); printLabelStr("x", int_to_string(agent_i->grid_max_dim.x), 2); printLabelStr("y", int_to_string(agent_i->grid_max_dim.y), 2); printLabelStr("z", int_to_string(agent_i->grid_max_dim.z), 2); printLabelInt("Max fbarriers/Workgrp:", agent_i->fbarrier_max_size, 1); printLabelInt("Packet Processor uCode::", agent_i->pkt_processor_ucode_ver, 1); printLabelInt("SDMA engine uCode::", agent_i->sdma_ucode_ver, 1); printLabelStr("IOMMU Support::", agent_i->iommu_support == HSA_IOMMU_SUPPORT_V2 ? "V2" : "None", 1); } } static hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i) { hsa_status_t err; err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag); RET_IF_HSA_ERR(err); err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &pool_i->segment); RET_IF_HSA_ERR(err); // Get the size of the POOL err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &pool_i->pool_size); RET_IF_HSA_ERR(err); err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &pool_i->alloc_allowed); RET_IF_HSA_ERR(err); err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &pool_i->alloc_granule); RET_IF_HSA_ERR(err); err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE, &pool_i->alloc_rec_granule); RET_IF_HSA_ERR(err); err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &pool_i->pool_alloc_alignment); RET_IF_HSA_ERR(err); err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pool_i->pl_access); RET_IF_HSA_ERR(err); return HSA_STATUS_SUCCESS; } static void MakeGlobalFlagsString(uint32_t global_flag, std::string* out_str) { *out_str = ""; std::vector flags; if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) { flags.push_back("KERNARG"); } if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) { flags.push_back("FINE GRAINED"); } if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) { flags.push_back("COARSE GRAINED"); } if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED & global_flag) { flags.push_back("EXTENDED FINE GRAINED"); } if (flags.size() > 0) { *out_str += flags[0]; } for (size_t i = 1; i < flags.size(); i++) { *out_str += ", " + flags[i]; } } static void DumpSegment(pool_info_t *pool_i, uint32_t ind_lvl) { std::string seg_str; std::string tmp_str; printLabel("Segment:", false, ind_lvl); switch (pool_i->segment) { case HSA_AMD_SEGMENT_GLOBAL: MakeGlobalFlagsString(pool_i->global_flag, &tmp_str); seg_str += "GLOBAL; FLAGS: " + tmp_str; break; case HSA_AMD_SEGMENT_READONLY: seg_str += "READONLY"; break; case HSA_AMD_SEGMENT_PRIVATE: seg_str += "PRIVATE"; break; case HSA_AMD_SEGMENT_GROUP: seg_str += "GROUP"; break; default: printf("Not Supported\n"); break; } printValueStr(seg_str.c_str()); } static void DisplayPoolInfo(pool_info_t *pool_i, uint32_t indent) { DumpSegment(pool_i, indent); size_t sz = pool_i->pool_size/1024; printLabelStr("Size:", int_to_string(sz) + "KB", indent); printLabelStr("Allocatable:", (pool_i->alloc_allowed ? "TRUE" : "FALSE"), indent); std::string gr_str = std::to_string(pool_i->alloc_granule/1024)+"KB"; printLabelStr("Alloc Granule:", gr_str.c_str(), indent); std::string rgr_str = std::to_string(pool_i->alloc_rec_granule / 1024) + "KB"; printLabelStr("Alloc Recommended Granule:", rgr_str.c_str(), indent); std::string al_str = std::to_string(pool_i->pool_alloc_alignment/1024)+"KB"; printLabelStr("Alloc Alignment:", al_str.c_str(), indent); printLabelStr("Accessible by all:", (pool_i->pl_access ? "TRUE" : "FALSE"), indent); } static hsa_status_t AcquireAndDisplayMemPoolInfo(const hsa_amd_memory_pool_t pool, uint32_t indent) { hsa_status_t err; pool_info_t pool_i; err = AcquirePoolInfo(pool, &pool_i); RET_IF_HSA_ERR(err); DisplayPoolInfo(&pool_i, 3); return err; } static hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { hsa_status_t err; int* p_int = reinterpret_cast(data); (*p_int)++; std::string pool_str("Pool "); pool_str += std::to_string(*p_int); printLabel(pool_str.c_str(), true, 2); err = AcquireAndDisplayMemPoolInfo(pool, 3); RET_IF_HSA_ERR(err); return err; } static hsa_status_t AcquireISAInfo(hsa_isa_t isa, isa_info_t *isa_i) { hsa_status_t err; uint32_t name_len; err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len); RET_IF_HSA_ERR(err); isa_i->name_str = new char[name_len]; if (isa_i->name_str == nullptr) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, isa_i->name_str); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_MACHINE_MODELS, isa_i->mach_models); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_PROFILES, isa_i->profiles); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->def_rounding_modes); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->base_rounding_modes); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FAST_F16_OPERATION, &isa_i->fast_f16); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_DIM, &isa_i->workgroup_max_dim); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_SIZE, &isa_i->workgroup_max_size); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_DIM, &isa_i->grid_max_dim); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_SIZE, &isa_i->grid_max_size); RET_IF_HSA_ERR(err); err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FBARRIER_MAX_SIZE, &isa_i->fbarrier_max_size); RET_IF_HSA_ERR(err); return err; } static void DisplayISAInfo(isa_info_t *isa_i, uint32_t indent) { printLabelStr("Name:", isa_i->name_str, indent); std::string models(""); if (isa_i->mach_models[HSA_MACHINE_MODEL_SMALL]) { models = "HSA_MACHINE_MODEL_SMALL "; } if (isa_i->mach_models[HSA_MACHINE_MODEL_LARGE]) { models += "HSA_MACHINE_MODEL_LARGE"; } printLabelStr("Machine Models:", models.c_str(), indent); std::string profiles(""); if (isa_i->profiles[HSA_PROFILE_BASE]) { profiles = "HSA_PROFILE_BASE "; } if (isa_i->profiles[HSA_PROFILE_FULL]) { profiles += "HSA_PROFILE_FULL"; } printLabelStr("Profiles:", profiles.c_str(), indent); std::string rounding_modes(""); if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT]) { rounding_modes = "DEFAULT "; } if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO]) { rounding_modes += "ZERO "; } if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR]) { rounding_modes += "NEAR"; } printLabelStr("Default Rounding Mode:", rounding_modes.c_str(), indent); rounding_modes = ""; if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT]) { rounding_modes = "DEFAULT "; } if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO]) { rounding_modes += "ZERO "; } if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR]) { rounding_modes += "NEAR"; } printLabelStr("Default Rounding Mode:", rounding_modes.c_str(), indent); printLabelStr("Fast f16:", (isa_i->fast_f16 ? "TRUE" : "FALSE"), indent); printLabelStr("Workgroup Max Size:", int_to_string(isa_i->workgroup_max_size), indent); printLabel("Workgroup Max Size per Dimension:", true, indent); printLabelStr("x", int_to_string( static_cast(isa_i->workgroup_max_dim[0])), indent+1); printLabelStr("y", int_to_string( static_cast(isa_i->workgroup_max_dim[1])), indent+1); printLabelStr("z", int_to_string( static_cast(isa_i->workgroup_max_dim[2])), indent+1); printLabelStr("Grid Max Size:", int_to_string(isa_i->grid_max_size), indent); printLabel("Grid Max Size per Dimension:", true, indent); printLabelStr("x", int_to_string(isa_i->grid_max_dim.x), indent+1); printLabelStr("y", int_to_string(isa_i->grid_max_dim.y), indent+1); printLabelStr("z", int_to_string(isa_i->grid_max_dim.z), indent+1); printLabelInt("FBarrier Max Size:", isa_i->fbarrier_max_size, indent); } static hsa_status_t AcquireAndDisplayISAInfo(const hsa_isa_t isa, uint32_t indent) { hsa_status_t err; isa_info_t isa_i; isa_i.name_str = nullptr; err = AcquireISAInfo(isa, &isa_i); RET_IF_HSA_ERR(err); DisplayISAInfo(&isa_i, 3); if (isa_i.name_str != nullptr) { delete []isa_i.name_str; } return err; } static hsa_status_t get_isa_info(hsa_isa_t isa, void* data) { hsa_status_t err; int* isa_int = reinterpret_cast(data); (*isa_int)++; std::string isa_str("ISA "); isa_str += std::to_string(*isa_int); printLabel(isa_str.c_str(), true, 2); err = AcquireAndDisplayISAInfo(isa, 3); RET_IF_HSA_ERR(err); return err; } // Cache info dump is ifdef'd out as it generates a lot of output that is // not that interesting. Define ENABLE_CACHE_DUMP if this is of interest. #ifdef ENABLE_CACHE_DUMP static void DisplayCacheInfo(cache_info_t *cache_i, uint32_t indent) { printLabelStr("Name:", cache_i->name_str, indent); printLabelInt("Level:", cache_i->level, indent); printLabelInt("Size:", cache_i->size, indent); } static hsa_status_t AcquireCacheInfo(hsa_cache_t cache, cache_info_t *cache_i) { hsa_status_t err; uint32_t name_len; err = hsa_cache_get_info(cache, HSA_CACHE_INFO_NAME_LENGTH, &name_len); RET_IF_HSA_ERR(err); cache_i->name_str = new char[name_len]; if (cache_i->name_str == nullptr) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } err = hsa_cache_get_info(cache, HSA_CACHE_INFO_NAME, cache_i->name_str); RET_IF_HSA_ERR(err); err = hsa_cache_get_info(cache, HSA_CACHE_INFO_LEVEL, &cache_i->level); RET_IF_HSA_ERR(err); err = hsa_cache_get_info(cache, HSA_CACHE_INFO_SIZE, &cache_i->size); RET_IF_HSA_ERR(err); return err; } static hsa_status_t AcquireAndDisplayCacheInfo(const hsa_cache_t cache, uint32_t indent) { hsa_status_t err; cache_info_t cache_i; err = AcquireCacheInfo(cache, &cache_i); RET_IF_HSA_ERR(err); DisplayCacheInfo(&cache_i, 3); if (cache_i.name_str != nullptr) { delete []cache_i.name_str; } return err; } static hsa_status_t get_cache_info(hsa_cache_t cache, void* data) { hsa_status_t err; int* cache_int = reinterpret_cast(data); (*cache_int)++; std::string cache_str("Cache L"); cache_str += std::to_string(*cache_int); printLabel(cache_str.c_str(), true, 2); err = AcquireAndDisplayCacheInfo(cache, 3); RET_IF_HSA_ERR(err); return err; } #endif // ENABLE_CACHE_DUMP static hsa_status_t AcquireAndDisplayAgentInfo(hsa_agent_t agent, void* data) { int pool_number = 0; int isa_number = 0; hsa_status_t err; agent_info_t agent_i; int *agent_number = reinterpret_cast(data); (*agent_number)++; err = AcquireAgentInfo(agent, &agent_i); RET_IF_HSA_ERR(err); printLabel("*******", true); std::string agent_ind("Agent "); agent_ind += std::to_string(*agent_number).c_str(); printLabel(agent_ind.c_str(), true); printLabel("*******", true); DisplayAgentInfo(&agent_i); printLabel("Pool Info:", true, 1); err = hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, &pool_number); RET_IF_HSA_ERR(err); printLabel("ISA Info:", true, 1); err = hsa_agent_iterate_isas(agent, get_isa_info, &isa_number); if (err == HSA_STATUS_ERROR_INVALID_AGENT) { printLabel("N/A", true, 2); return HSA_STATUS_SUCCESS; } RET_IF_HSA_ERR(err); #if ENABLE_CACHE_DUMP int cache_number = 0; printLabel("Cache Info:", true, 1); err = hsa_agent_iterate_caches(agent, get_cache_info, &cache_number); if (err == HSA_STATUS_ERROR_INVALID_AGENT) { printLabel("N/A", true, 2); return HSA_STATUS_SUCCESS; } #endif RET_IF_HSA_ERR(err); return HSA_STATUS_SUCCESS; } int CheckInitialState(void) { // Check kernel module for ROCk is loaded std::ifstream amdgpu_initstate("/sys/module/amdgpu/initstate"); if (amdgpu_initstate){ std::stringstream buffer; buffer << amdgpu_initstate.rdbuf(); amdgpu_initstate.close(); std::string line; bool is_live = false; while (std::getline(buffer, line)){ if (line.find( "live" ) != std::string::npos){ is_live = true; break; } } if (is_live){ std::ifstream amdgpu_version("/sys/module/amdgpu/version"); if (amdgpu_version){ std::stringstream buffer; buffer << amdgpu_version.rdbuf(); std::string vers; std::getline(buffer, vers); amdgpu_version.close(); printf("%sROCk module version %s is loaded%s\n", COL_WHT, vers.c_str(), COL_RESET); } else { printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET); } } else { printf("%sROCk module is NOT live, possibly no GPU devices%s\n", COL_RED, COL_RESET); return -1; } } else { printf("%sROCk module is NOT loaded, possibly no GPU devices%s\n", COL_RED, COL_RESET); return -1; } // Check if user belongs to the group for /dev/kfd (e.g. "video" or // "render") // @note: User who are not members of "video" // group cannot access DRM services char u_name[32]; bool member = false; struct passwd *pw; int num_groups = 0; gid_t *groups; // Check if we can open /dev/kfd as read-write. If not, try to // diagnose common reasons why you can't. int open_kfd = open("/dev/kfd", O_RDWR); if (open_kfd >= 0) { close(open_kfd); return 0; } printf("%sUnable to open /dev/kfd read-write: %s%s\n", COL_RED, strerror(errno), COL_RESET); const char *kfd_gr_name = NULL; struct stat sb; if (stat("/dev/kfd", &sb) == 0) { // The owner of kfd was renamed, so avoid hard-coding the // name. Check whatever group owns it. if (struct group *kfd_gr = getgrgid(sb.st_gid)) kfd_gr_name = kfd_gr->gr_name; } if (!kfd_gr_name) kfd_gr_name = "video"; struct group *gr_s = getgrnam(kfd_gr_name); // NOLINT if (gr_s == nullptr) { printf("%sFailed to get group info to check" " for %s group membership%s\n", COL_RED, kfd_gr_name, COL_RESET); return -1; } if (getlogin_r(u_name, 32)) { printf("%sFailed to get user name to check for" " %s group membership%s\n", COL_RED, kfd_gr_name, COL_RESET); return -1; } pw = getpwnam(u_name); // NOLINT if (pw == NULL) { printf("%sFailed to find pwd entry for user %s%s\n", COL_RED, u_name, COL_RESET); return -1; } (void)getgrouplist(u_name, pw->pw_gid, NULL, &num_groups); groups = new gid_t[num_groups]; if (getgrouplist(u_name, pw->pw_gid, groups, &num_groups) == -1) { printf("%sFailed to get user group list%s\n", COL_RED, COL_RESET); delete []groups; return -1; } for (int i = 0; i < num_groups; ++i) { if (gr_s->gr_gid == groups[i]) { printf("%s%s is member of %s group%s\n", COL_WHT, u_name, kfd_gr_name, COL_RESET); member = true; break; } } if (member == false) { printf("%s%s is not member of \"%s\" group, the default DRM access " "group. Users must be a member of the \"%s\" group or another" " DRM access group in order for ROCm applications to run " "successfully%s.\n", COL_RED, u_name, kfd_gr_name, kfd_gr_name, COL_RESET); } delete []groups; return -1; } // Print out all static information known to HSA about the target system. // Throughout this program, the Acquire-type functions make HSA calls to // interate through HSA objects and then perform HSA get_info calls to // acccumulate information about those objects. Corresponding to each // Acquire-type function is a Display* function which display the // accumulated data in a formatted way. int main(int argc, char* argv[]) { hsa_status_t err; if (CheckInitialState()) { return 1; } err = hsa_init(); RET_IF_HSA_ERR(err) // Acquire and display system information system_info_t sys_info; // This function will call HSA get_info functions to gather information // about the system. err = AcquireSystemInfo(&sys_info); RET_IF_HSA_ERR(err); printLabel("=====================", true); printLabel("HSA System Attributes", true); printLabel("=====================", true); DisplaySystemInfo(&sys_info); // Iterate through every agent and get and display their info printLabel("==========", true); printLabel("HSA Agents", true); printLabel("==========", true); uint32_t agent_ind = 0; err = hsa_iterate_agents(AcquireAndDisplayAgentInfo, &agent_ind); RET_IF_HSA_ERR(err); printLabel("*** Done ***", true); err = hsa_shut_down(); RET_IF_HSA_ERR(err); return 0; } #undef RET_IF_HSA_ERR